https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112776
Bug ID: 112776 Summary: RISC-V Regression: Missed optimization of VSETVL PASS Product: gcc Version: 14.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: juzhe.zhong at rivai dot ai Target Milestone: --- #include "riscv_vector.h" void foo_vec(float *r, const float *x) { int i, k; vfloat32m4_t x_vec; vfloat32m4_t x_forward_vec; vfloat32m4_t temp_vec; /** * I have to use m1 to complicat intrisic */ vfloat32m1_t dst_vec; vfloat32m1_t src_vec; float result = 0.0f; float shift_prev = 0.0f; size_t n = 64; for(size_t vl; n>0; n -=vl){ vl = __riscv_vsetvl_e32m4(n); //LMUL=4 x_vec = __riscv_vle32_v_f32m4(&x[0], vl); x_forward_vec = __riscv_vle32_v_f32m4(&x[0], vl); temp_vec = __riscv_vfmul_vv_f32m4(x_vec, x_forward_vec, vl); /** * I have to use m1 to complicat intrisic */ //vfloat32m1_t __riscv_vfmv_s_tu(vfloat32m1_t vd, float rs1, size_t vl); src_vec = __riscv_vfmv_s_tu(src_vec, 0.0f, vl); //initial src_vec //dst_vec = __riscv_vfmv_s_f_f32m1_tu(dst_vec, 0.0f, vl); //clean for vfredosum dst_vec = __riscv_vfmv_s_tu(dst_vec, 0.0f, vl); //clean for vfredosum dst_vec = __riscv_vfredosum_tu(dst_vec, temp_vec, src_vec, vl); r[0] = __riscv_vfmv_f_s_f32m1_f32(dst_vec); } } ASM: GCC-14 foo_vec: li a4,64 .L2: vsetvli a5,a4,e8,m1,ta,ma ---> vsetvli zero,a5,e32,m1,tu,ma vmv.s.x v2,zero vmv.s.x v1,zero vsetvli zero,a5,e32,m4,tu,ma vle32.v v4,0(a1) vfmul.vv v4,v4,v4 vfredosum.vs v1,v4,v2 vfmv.f.s fa5,v1 fsw fa5,0(a0) sub a4,a4,a5 bne a4,zero,.L2 ret GCC-13: foo_vec(float*, float const*): fmv.s.x fa5,zero li a4,64 .L2: vsetvli a5,a4,e32,m4,ta,ma vle32.v v28,0(a1) vfmv.s.f v25,fa5 vfmul.vv v28,v28,v28 vfmv.s.f v24,fa5 sub a4,a4,a5 vfredosum.vs v24,v28,v25 vfmv.f.s fa4,v24 fsw fa4,0(a0) bne a4,zero,.L2 ret