committed to trunk
On Mon, Jan 6, 2025 at 6:06 PM Kito Cheng <kito.ch...@sifive.com> wrote: > > `.MASK_LEN_FOLD_LEFT_PLUS`(or `mask_len_fold_left_plus_m`) is expecting the > return value will be the start value even if the length is 0. > > However current code gen in RISC-V backend is not meet that semantic, it will > result a random garbage value if length is 0. > > Let example by current code gen for MASK_LEN_FOLD_LEFT_PLUS with f64: > # _148 = .MASK_LEN_FOLD_LEFT_PLUS (stmp__148.33_134, vect__70.32_138, > { -1, ... }, loop_len_161, 0); > vsetvli zero,a5,e64,m1,ta,ma > vfmv.s.f v2,fa5 # insn 1 > vfredosum.vs v1,v1,v2 # insn 2 > vfmv.f.s fa5,v1 # insn 3 > > insn 1: > - vfmv.s.f won't do anything if VL=0, which means v2 will contain garbage > value. > insn 2: > - vfredosum.vs won't do anything if VL=0, and keep vd unchanged even TA. > (v-spec say: `If vl=0, no operation is performed and the destination register > is not updated.`) > insn 3: > - vfmv.f.s will move the value from v1 even VL=0, so this is safe. > > So how we fix that? we need two fix for that: > > 1. insn 1: need always execute with VL=1, so that we can guarantee it will > always work as expect. > 2. insn 2: Add new pattern to force `vd` use same reg as `vs1` (start value) > for > all reduction patterns, then we can guarantee vd[0] will contain > the > start value when vl=0 > > For 1, it's just a simple change to riscv_vector::expand_reduction, but for 2, > we have to add _VL0_SAFE variant reduction to force `vd` use same reg as `vs1` > (start value). > > Change since V3: > - Rename _AV to _VL0_SAFE for readability. > - Use non-VL0_SAFE version if VL is const or VLMAX. > - Only force VL=1 for vfmv.s.f when VL is non-const and non-VLMAX. > - Two more testcase. > > gcc/ChangeLog: > > * config/riscv/autovec-opt.md (*widen_reduc_plus_scal_<mode>): Adjust > argument for expand_reduction. > (*widen_reduc_plus_scal_<mode>): Ditto. > (*fold_left_widen_plus_<mode>): Ditto. > (*mask_len_fold_left_widen_plus_<mode>): Ditto. > (*cond_widen_reduc_plus_scal_<mode>): Ditto. > (*cond_len_widen_reduc_plus_scal_<mode>): Ditto. > (*cond_widen_reduc_plus_scal_<mode>): Ditto. > * config/riscv/autovec.md (reduc_plus_scal_<mode>): Adjust argument > for > expand_reduction. > (reduc_smax_scal_<mode>): Ditto. > (reduc_umax_scal_<mode>): Ditto. > (reduc_smin_scal_<mode>): Ditto. > (reduc_umin_scal_<mode>): Ditto. > (reduc_and_scal_<mode>): Ditto. > (reduc_ior_scal_<mode>): Ditto. > (reduc_xor_scal_<mode>): Ditto. > (reduc_plus_scal_<mode>): Ditto. > (reduc_smax_scal_<mode>): Ditto. > (reduc_smin_scal_<mode>): Ditto. > (reduc_fmax_scal_<mode>): Ditto. > (reduc_fmin_scal_<mode>): Ditto. > (fold_left_plus_<mode>): Ditto. > (mask_len_fold_left_plus_<mode>): Ditto. > * config/riscv/riscv-v.cc (expand_reduction): Add one more > argument for reduction code for vl0-safe. > * config/riscv/riscv-protos.h (expand_reduction): Ditto. > * config/riscv/vector-iterators.md (unspec): Add _VL0_SAFE variant of > reduction. > (ANY_REDUC_VL0_SAFE): New. > (ANY_WREDUC_VL0_SAFE): Ditto. > (ANY_FREDUC_VL0_SAFE): Ditto. > (ANY_FREDUC_SUM_VL0_SAFE): Ditto. > (ANY_FWREDUC_SUM_VL0_SAFE): Ditto. > (reduc_op): Add _VL0_SAFE variant of reduction. > (order) Ditto. > * config/riscv/vector.md (@pred_<reduc_op><mode>): New. > > gcc/testsuite/ChangeLog: > > * gfortran.target/riscv/rvv/pr118182.f: New. > * gcc.target/riscv/rvv/autovec/pr118182-1.c: New. > * gcc.target/riscv/rvv/autovec/pr118182-2.c: New. > --- > gcc/config/riscv/autovec-opt.md | 10 +- > gcc/config/riscv/autovec.md | 51 +++++-- > gcc/config/riscv/riscv-protos.h | 2 +- > gcc/config/riscv/riscv-v.cc | 25 +++- > gcc/config/riscv/vector-iterators.md | 59 ++++++++ > gcc/config/riscv/vector.md | 133 +++++++++++++++++- > .../gcc.target/riscv/rvv/autovec/pr118182-1.c | 28 ++++ > .../gcc.target/riscv/rvv/autovec/pr118182-2.c | 27 ++++ > .../gfortran.target/riscv/rvv/pr118182.f | 63 +++++++++ > 9 files changed, 375 insertions(+), 23 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-2.c > create mode 100644 gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f > > diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md > index d7190725adb..53431863441 100644 > --- a/gcc/config/riscv/autovec-opt.md > +++ b/gcc/config/riscv/autovec-opt.md > @@ -810,7 +810,8 @@ (define_insn_and_split "*widen_reduc_plus_scal_<mode>" > "&& 1" > [(const_int 0)] > { > - riscv_vector::expand_reduction (<WREDUC_UNSPEC>, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (<WREDUC_UNSPEC>, <WREDUC_UNSPEC_VL0_SAFE>, > + riscv_vector::REDUCE_OP, > operands, > CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); > DONE; > @@ -830,6 +831,7 @@ (define_insn_and_split "*widen_reduc_plus_scal_<mode>" > [(const_int 0)] > { > riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, > + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_FRM_DYN, > operands, > CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); > @@ -851,6 +853,7 @@ (define_insn_and_split "*fold_left_widen_plus_<mode>" > [(const_int 0)] > { > riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, > + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_FRM_DYN, > operands, operands[2]); > DONE; > @@ -879,6 +882,7 @@ (define_insn_and_split > "*mask_len_fold_left_widen_plus_<mode>" > { > rtx ops[] = {operands[0], operands[2], operands[3], operands[4]}; > riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED, > + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_M_FRM_DYN, > ops, operands[1]); > } > @@ -1227,6 +1231,7 @@ (define_insn_and_split > "*cond_widen_reduc_plus_scal_<mode>" > rtx ops[] = {operands[0], operands[2], operands[1], > gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; > riscv_vector::expand_reduction (<WREDUC_UNSPEC>, > + <WREDUC_UNSPEC_VL0_SAFE>, > riscv_vector::REDUCE_OP_M, > ops, CONST0_RTX > (<V_DOUBLE_EXTEND_VEL>mode)); > DONE; > @@ -1282,6 +1287,7 @@ (define_insn_and_split > "*cond_len_widen_reduc_plus_scal_<mode>" > { > rtx ops[] = {operands[0], operands[3], operands[1], operands[2]}; > riscv_vector::expand_reduction (<WREDUC_UNSPEC>, > + <WREDUC_UNSPEC_VL0_SAFE>, > riscv_vector::REDUCE_OP_M, > ops, CONST0_RTX > (<V_DOUBLE_EXTEND_VEL>mode)); > DONE; > @@ -1318,6 +1324,7 @@ (define_insn_and_split > "*cond_widen_reduc_plus_scal_<mode>" > rtx ops[] = {operands[0], operands[2], operands[1], > gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; > riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, > + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_M_FRM_DYN, > ops, CONST0_RTX > (<V_DOUBLE_EXTEND_VEL>mode)); > DONE; > @@ -1373,6 +1380,7 @@ (define_insn_and_split > "*cond_len_widen_reduc_plus_scal_<mode>" > { > rtx ops[] = {operands[0], operands[3], operands[1], operands[2]}; > riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, > + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_M_FRM_DYN, > ops, CONST0_RTX > (<V_DOUBLE_EXTEND_VEL>mode)); > DONE; > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md > index 8d22b5f9c59..9bd59bd5665 100644 > --- a/gcc/config/riscv/autovec.md > +++ b/gcc/config/riscv/autovec.md > @@ -2185,7 +2185,9 @@ (define_insn_and_split "reduc_plus_scal_<mode>" > "&& 1" > [(const_int 0)] > { > - riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, > + UNSPEC_REDUC_SUM_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, CONST0_RTX (<VEL>mode)); > DONE; > } > @@ -2198,7 +2200,9 @@ (define_expand "reduc_smax_scal_<mode>" > { > int prec = GET_MODE_PRECISION (<VEL>mode); > rtx min = immed_wide_int_const (wi::min_value (prec, SIGNED), <VEL>mode); > - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, > + UNSPEC_REDUC_MAX_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, min); > DONE; > }) > @@ -2208,7 +2212,9 @@ (define_expand "reduc_umax_scal_<mode>" > (match_operand:V_VLSI 1 "register_operand")] > "TARGET_VECTOR" > { > - riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU, > + UNSPEC_REDUC_MAXU_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, CONST0_RTX (<VEL>mode)); > DONE; > }) > @@ -2220,7 +2226,9 @@ (define_expand "reduc_smin_scal_<mode>" > { > int prec = GET_MODE_PRECISION (<VEL>mode); > rtx max = immed_wide_int_const (wi::max_value (prec, SIGNED), <VEL>mode); > - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, > + UNSPEC_REDUC_MIN_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, max); > DONE; > }) > @@ -2232,7 +2240,9 @@ (define_expand "reduc_umin_scal_<mode>" > { > int prec = GET_MODE_PRECISION (<VEL>mode); > rtx max = immed_wide_int_const (wi::max_value (prec, UNSIGNED), <VEL>mode); > - riscv_vector::expand_reduction (UNSPEC_REDUC_MINU, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MINU, > + UNSPEC_REDUC_MINU_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, max); > DONE; > }) > @@ -2242,7 +2252,9 @@ (define_expand "reduc_and_scal_<mode>" > (match_operand:V_VLSI 1 "register_operand")] > "TARGET_VECTOR" > { > - riscv_vector::expand_reduction (UNSPEC_REDUC_AND, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_AND, > + UNSPEC_REDUC_AND_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, CONSTM1_RTX (<VEL>mode)); > DONE; > }) > @@ -2252,7 +2264,9 @@ (define_expand "reduc_ior_scal_<mode>" > (match_operand:V_VLSI 1 "register_operand")] > "TARGET_VECTOR" > { > - riscv_vector::expand_reduction (UNSPEC_REDUC_OR, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_OR, > + UNSPEC_REDUC_OR_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, CONST0_RTX (<VEL>mode)); > DONE; > }) > @@ -2262,7 +2276,9 @@ (define_expand "reduc_xor_scal_<mode>" > (match_operand:V_VLSI 1 "register_operand")] > "TARGET_VECTOR" > { > - riscv_vector::expand_reduction (UNSPEC_REDUC_XOR, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_XOR, > + UNSPEC_REDUC_XOR_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, CONST0_RTX (<VEL>mode)); > DONE; > }) > @@ -2287,6 +2303,7 @@ (define_insn_and_split "reduc_plus_scal_<mode>" > [(const_int 0)] > { > riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED, > + UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_FRM_DYN, > operands, CONST0_RTX (<VEL>mode)); > DONE; > @@ -2301,7 +2318,9 @@ (define_expand "reduc_smax_scal_<mode>" > REAL_VALUE_TYPE rv; > real_inf (&rv, true); > rtx f = const_double_from_real_value (rv, <VEL>mode); > - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, > + UNSPEC_REDUC_MAX_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, f); > DONE; > }) > @@ -2314,7 +2333,9 @@ (define_expand "reduc_smin_scal_<mode>" > REAL_VALUE_TYPE rv; > real_inf (&rv, false); > rtx f = const_double_from_real_value (rv, <VEL>mode); > - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, > + UNSPEC_REDUC_MIN_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, f); > DONE; > }) > @@ -2327,7 +2348,9 @@ (define_expand "reduc_fmax_scal_<mode>" > REAL_VALUE_TYPE rv; > real_inf (&rv, true); > rtx f = const_double_from_real_value (rv, <VEL>mode); > - riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, > + UNSPEC_REDUC_MAX_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, f); > DONE; > }) > @@ -2340,7 +2363,9 @@ (define_expand "reduc_fmin_scal_<mode>" > REAL_VALUE_TYPE rv; > real_inf (&rv, false); > rtx f = const_double_from_real_value (rv, <VEL>mode); > - riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP, > + riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, > + UNSPEC_REDUC_MIN_VL0_SAFE, > + riscv_vector::REDUCE_OP, > operands, f); > DONE; > }) > @@ -2366,6 +2391,7 @@ (define_insn_and_split "fold_left_plus_<mode>" > { > rtx ops[] = {operands[0], operands[2]}; > riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, > + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_FRM_DYN, > ops, operands[1]); > DONE; > @@ -2393,6 +2419,7 @@ (define_insn_and_split "mask_len_fold_left_plus_<mode>" > { > rtx ops[] = {operands[0], operands[2], operands[3], operands[4]}; > riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED, > + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE, > riscv_vector::REDUCE_OP_M_FRM_DYN, > ops, operands[1]); > } > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index dd3b36d47a6..d9421c907da 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -639,7 +639,7 @@ void expand_vec_cmp (rtx, rtx_code, rtx, rtx, rtx = > nullptr, rtx = nullptr); > bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); > void expand_cond_len_unop (unsigned, rtx *); > void expand_cond_len_binop (unsigned, rtx *); > -void expand_reduction (unsigned, unsigned, rtx *, rtx); > +void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx); > void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode); > void expand_vec_floor (rtx, rtx, machine_mode, machine_mode); > void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode); > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 7f4ce2b0930..b8e1941b5fe 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -4504,30 +4504,47 @@ expand_cond_ternop (unsigned icode, rtx *ops) > Case 2: ops = {scalar_dest, vector_src, mask, vl} > */ > void > -expand_reduction (unsigned unspec, unsigned insn_flags, rtx *ops, rtx init) > +expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe, > + unsigned insn_flags, rtx *ops, rtx init) > { > rtx scalar_dest = ops[0]; > rtx vector_src = ops[1]; > machine_mode vmode = GET_MODE (vector_src); > machine_mode vel_mode = GET_MODE (scalar_dest); > machine_mode m1_mode = get_m1_mode (vel_mode).require (); > + rtx vl_op = NULL_RTX; > + bool need_vl0_safe = false; > + if (need_mask_operand_p (insn_flags)) > + { > + vl_op = ops[3]; > + need_vl0_safe = !CONST_INT_P (vl_op) && !CONST_POLY_INT_P (vl_op); > + } > > rtx m1_tmp = gen_reg_rtx (m1_mode); > rtx scalar_move_ops[] = {m1_tmp, init}; > insn_code icode = code_for_pred_broadcast (m1_mode); > if (need_mask_operand_p (insn_flags)) > - emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, ops[3]); > + { > + if (need_vl0_safe) > + emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, > const1_rtx); > + else > + emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, vl_op); > + } > else > emit_vlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops); > > rtx m1_tmp2 = gen_reg_rtx (m1_mode); > rtx reduc_ops[] = {m1_tmp2, vector_src, m1_tmp}; > - icode = code_for_pred (unspec, vmode); > + > + if (need_vl0_safe) > + icode = code_for_pred (unspec_for_vl0_safe, vmode); > + else > + icode = code_for_pred (unspec, vmode); > > if (need_mask_operand_p (insn_flags)) > { > rtx mask_len_reduc_ops[] = {m1_tmp2, ops[2], vector_src, m1_tmp}; > - emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, ops[3]); > + emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, vl_op); > } > else > emit_vlmax_insn (icode, insn_flags, reduc_ops); > diff --git a/gcc/config/riscv/vector-iterators.md > b/gcc/config/riscv/vector-iterators.md > index dfab0bbf014..3c8da66cebf 100644 > --- a/gcc/config/riscv/vector-iterators.md > +++ b/gcc/config/riscv/vector-iterators.md > @@ -88,8 +88,11 @@ (define_c_enum "unspec" [ > ;; Integer and Float Reduction > UNSPEC_REDUC > UNSPEC_REDUC_SUM > + UNSPEC_REDUC_SUM_VL0_SAFE > UNSPEC_REDUC_SUM_ORDERED > UNSPEC_REDUC_SUM_UNORDERED > + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE > + UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE > UNSPEC_REDUC_MAXU > UNSPEC_REDUC_MAX > UNSPEC_REDUC_MINU > @@ -97,11 +100,22 @@ (define_c_enum "unspec" [ > UNSPEC_REDUC_AND > UNSPEC_REDUC_OR > UNSPEC_REDUC_XOR > + UNSPEC_REDUC_MAXU_VL0_SAFE > + UNSPEC_REDUC_MAX_VL0_SAFE > + UNSPEC_REDUC_MINU_VL0_SAFE > + UNSPEC_REDUC_MIN_VL0_SAFE > + UNSPEC_REDUC_AND_VL0_SAFE > + UNSPEC_REDUC_OR_VL0_SAFE > + UNSPEC_REDUC_XOR_VL0_SAFE > > UNSPEC_WREDUC_SUM > UNSPEC_WREDUC_SUMU > + UNSPEC_WREDUC_SUM_VL0_SAFE > + UNSPEC_WREDUC_SUMU_VL0_SAFE > UNSPEC_WREDUC_SUM_ORDERED > UNSPEC_WREDUC_SUM_UNORDERED > + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE > + UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE > UNSPEC_SELECT_MASK > > UNSPEC_SF_VFNRCLIP > @@ -1665,32 +1679,75 @@ (define_int_iterator ANY_REDUC [ > UNSPEC_REDUC_MIN UNSPEC_REDUC_AND UNSPEC_REDUC_OR UNSPEC_REDUC_XOR > ]) > > +(define_int_iterator ANY_REDUC_VL0_SAFE [ > + UNSPEC_REDUC_SUM_VL0_SAFE UNSPEC_REDUC_MAXU_VL0_SAFE > UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MINU_VL0_SAFE > + UNSPEC_REDUC_MIN_VL0_SAFE UNSPEC_REDUC_AND_VL0_SAFE > UNSPEC_REDUC_OR_VL0_SAFE UNSPEC_REDUC_XOR_VL0_SAFE > +]) > + > (define_int_iterator ANY_WREDUC [ > UNSPEC_WREDUC_SUM UNSPEC_WREDUC_SUMU > ]) > > +(define_int_iterator ANY_WREDUC_VL0_SAFE [ > + UNSPEC_WREDUC_SUM_VL0_SAFE UNSPEC_WREDUC_SUMU_VL0_SAFE > +]) > + > (define_int_iterator ANY_FREDUC [ > UNSPEC_REDUC_MAX UNSPEC_REDUC_MIN > ]) > > +(define_int_iterator ANY_FREDUC_VL0_SAFE [ > + UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MIN_VL0_SAFE > +]) > + > (define_int_iterator ANY_FREDUC_SUM [ > UNSPEC_REDUC_SUM_ORDERED UNSPEC_REDUC_SUM_UNORDERED > ]) > > +(define_int_iterator ANY_FREDUC_SUM_VL0_SAFE [ > + UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE > +]) > + > (define_int_iterator ANY_FWREDUC_SUM [ > UNSPEC_WREDUC_SUM_ORDERED UNSPEC_WREDUC_SUM_UNORDERED > ]) > > +(define_int_iterator ANY_FWREDUC_SUM_VL0_SAFE [ > + UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE > +]) > + > +(define_int_attr reduc_op_pat_name [ > + (UNSPEC_REDUC_SUM "redsum") > + (UNSPEC_REDUC_SUM_VL0_SAFE "redsum_vl0s") > + (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum") > + (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum_vl0s") > (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum_vl0s") > + (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") > (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin") > + (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu_vl0s") (UNSPEC_REDUC_MAX_VL0_SAFE > "redmax_vl0s") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu_vl0s") > (UNSPEC_REDUC_MIN_VL0_SAFE "redmin_vl0s") > + (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR > "redxor") > + (UNSPEC_REDUC_AND_VL0_SAFE "redand_vl0s") (UNSPEC_REDUC_OR_VL0_SAFE > "redor_vl0s") (UNSPEC_REDUC_XOR_VL0_SAFE "redxor_vl0s") > + (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu") > + (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum_vl0s") (UNSPEC_WREDUC_SUMU_VL0_SAFE > "wredsumu_vl0s") > + (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED > "wredusum") > + (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum_vl0s") > (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum_vl0s") > +]) > + > (define_int_attr reduc_op [ > (UNSPEC_REDUC_SUM "redsum") > + (UNSPEC_REDUC_SUM_VL0_SAFE "redsum") > (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum") > + (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum") > (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum") > (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") > (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin") > + (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu") (UNSPEC_REDUC_MAX_VL0_SAFE > "redmax") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu") (UNSPEC_REDUC_MIN_VL0_SAFE > "redmin") > (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR > "redxor") > + (UNSPEC_REDUC_AND_VL0_SAFE "redand") (UNSPEC_REDUC_OR_VL0_SAFE "redor") > (UNSPEC_REDUC_XOR_VL0_SAFE "redxor") > (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu") > + (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum") (UNSPEC_WREDUC_SUMU_VL0_SAFE > "wredsumu") > (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED > "wredusum") > + (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum") > (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum") > ]) > > (define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") > (sign_extend "UNSPEC_WREDUC_SUM")]) > +(define_code_attr WREDUC_UNSPEC_VL0_SAFE [(zero_extend > "UNSPEC_WREDUC_SUMU_VL0_SAFE") (sign_extend "UNSPEC_WREDUC_SUM_VL0_SAFE")]) > > (define_mode_attr VINDEX [ > (RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI > "RVVM1QI") > @@ -3930,6 +3987,8 @@ (define_int_attr order [ > (UNSPEC_ORDERED "o") (UNSPEC_UNORDERED "u") > (UNSPEC_REDUC_SUM_ORDERED "o") (UNSPEC_REDUC_SUM_UNORDERED "u") > (UNSPEC_WREDUC_SUM_ORDERED "o") (UNSPEC_WREDUC_SUM_UNORDERED "u") > + (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "o") > (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "u") > + (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "o") > (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "u") > ]) > > (define_int_attr v_su [(UNSPEC_VMULHS "") (UNSPEC_VMULHU "u") > (UNSPEC_VMULHSU "su") > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index e78d1090696..4658db2653f 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -7745,9 +7745,14 @@ (define_insn "@pred_rod_trunc<mode>" > ;; - 14.3 Vector Single-Width Floating-Point Reduction Instructions > ;; - 14.4 Vector Widening Floating-Point Reduction Instructions > ;; > ------------------------------------------------------------------------------- > +;; > +;; NOTE for VL0 safe variantreduction: > +;; The VL0 safe variantis used by the auto vectorizer to generate > vectorized code > +;; only, because the auto vectorizer expect reduction should propgat the > start > +;; value to dest even VL=0, the only way is force vd=vs1 by constraint. > > ;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs) > -(define_insn "@pred_<reduc_op><mode>" > +(define_insn "@pred_<reduc_op_pat_name><mode>" > [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr, > vr") > (unspec:<V_LMUL1> > [(unspec:<VM> > @@ -7767,8 +7772,30 @@ (define_insn "@pred_<reduc_op><mode>" > [(set_attr "type" "vired") > (set_attr "mode" "<MODE>")]) > > +;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs) > +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for > detail) > +(define_insn "@pred_<reduc_op_pat_name><mode>" > + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") > + (unspec:<V_LMUL1> > + [(unspec:<VM> > + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") > + (match_operand 5 "vector_length_operand" " rK") > + (match_operand 6 "const_int_operand" " i") > + (match_operand 7 "const_int_operand" " i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:<V_LMUL1> [ > + (match_operand:V_VLSI 3 "register_operand" " vr") > + (match_operand:<V_LMUL1> 4 "register_operand" " 0") > + ] ANY_REDUC_VL0_SAFE) > + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] > UNSPEC_REDUC))] > + "TARGET_VECTOR" > + "v<reduc_op>.vs\t%0,%3,%4%p1" > + [(set_attr "type" "vired") > + (set_attr "mode" "<MODE>")]) > + > ;; Integer Widen Reduction Sum (vwredsum[u].vs) > -(define_insn "@pred_<reduc_op><mode>" > +(define_insn "@pred_<reduc_op_pat_name><mode>" > [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, > vr") > (unspec:<V_EXT_LMUL1> > [(unspec:<VM> > @@ -7788,8 +7815,30 @@ (define_insn "@pred_<reduc_op><mode>" > [(set_attr "type" "viwred") > (set_attr "mode" "<MODE>")]) > > +;; Integer Widen Reduction Sum (vwredsum[u].vs) > +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for > detail) > +(define_insn "@pred_<reduc_op_pat_name><mode>" > + [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr") > + (unspec:<V_EXT_LMUL1> > + [(unspec:<VM> > + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") > + (match_operand 5 "vector_length_operand" " rK") > + (match_operand 6 "const_int_operand" " i") > + (match_operand 7 "const_int_operand" " i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:<V_EXT_LMUL1> [ > + (match_operand:VI_QHS 3 "register_operand" " vr") > + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0") > + ] ANY_WREDUC_VL0_SAFE) > + (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " > vu")] UNSPEC_REDUC))] > + "TARGET_VECTOR" > + "v<reduc_op>.vs\t%0,%3,%4%p1" > + [(set_attr "type" "viwred") > + (set_attr "mode" "<MODE>")]) > + > ;; Float Reduction (vfred(max|min).vs) > -(define_insn "@pred_<reduc_op><mode>" > +(define_insn "@pred_<reduc_op_pat_name><mode>" > [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr, > vr") > (unspec:<V_LMUL1> > [(unspec:<VM> > @@ -7809,8 +7858,30 @@ (define_insn "@pred_<reduc_op><mode>" > [(set_attr "type" "vfredu") > (set_attr "mode" "<MODE>")]) > > +;; Float Reduction (vfred(max|min).vs) > +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for > detail) > +(define_insn "@pred_<reduc_op_pat_name><mode>" > + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") > + (unspec:<V_LMUL1> > + [(unspec:<VM> > + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") > + (match_operand 5 "vector_length_operand" " rK") > + (match_operand 6 "const_int_operand" " i") > + (match_operand 7 "const_int_operand" " i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:<V_LMUL1> [ > + (match_operand:V_VLSF 3 "register_operand" " vr") > + (match_operand:<V_LMUL1> 4 "register_operand" " 0") > + ] ANY_FREDUC_VL0_SAFE) > + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] > UNSPEC_REDUC))] > + "TARGET_VECTOR" > + "vf<reduc_op>.vs\t%0,%3,%4%p1" > + [(set_attr "type" "vfredu") > + (set_attr "mode" "<MODE>")]) > + > ;; Float Reduction Sum (vfred[ou]sum.vs) > -(define_insn "@pred_<reduc_op><mode>" > +(define_insn "@pred_<reduc_op_pat_name><mode>" > [(set (match_operand:<V_LMUL1> 0 "register_operand" > "=vr,vr") > (unspec:<V_LMUL1> > [(unspec:<VM> > @@ -7834,8 +7905,34 @@ (define_insn "@pred_<reduc_op><mode>" > (set (attr "frm_mode") > (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) > > +;; Float Reduction Sum (vfred[ou]sum.vs) > +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for > detail) > +(define_insn "@pred_<reduc_op_pat_name><mode>" > + [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr") > + (unspec:<V_LMUL1> > + [(unspec:<VM> > + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") > + (match_operand 5 "vector_length_operand" " rK") > + (match_operand 6 "const_int_operand" " i") > + (match_operand 7 "const_int_operand" " i") > + (match_operand 8 "const_int_operand" " i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM) > + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:<V_LMUL1> [ > + (match_operand:V_VLSF 3 "register_operand" " vr") > + (match_operand:<V_LMUL1> 4 "register_operand" " 0") > + ] ANY_FREDUC_SUM_VL0_SAFE) > + (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] > UNSPEC_REDUC))] > + "TARGET_VECTOR" > + "vf<reduc_op>.vs\t%0,%3,%4%p1" > + [(set_attr "type" "vfred<order>") > + (set_attr "mode" "<MODE>") > + (set (attr "frm_mode") > + (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) > + > ;; Float Widen Reduction Sum (vfwred[ou]sum.vs) > -(define_insn "@pred_<reduc_op><mode>" > +(define_insn "@pred_<reduc_op_pat_name><mode>" > [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, > vr") > (unspec:<V_EXT_LMUL1> > [(unspec:<VM> > @@ -7859,6 +7956,32 @@ (define_insn "@pred_<reduc_op><mode>" > (set (attr "frm_mode") > (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) > > +;; Float Widen Reduction Sum (vfwred[ou]sum.vs) > +;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for > detail) > +(define_insn "@pred_<reduc_op_pat_name><mode>" > + [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr") > + (unspec:<V_EXT_LMUL1> > + [(unspec:<VM> > + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") > + (match_operand 5 "vector_length_operand" " rK") > + (match_operand 6 "const_int_operand" " i") > + (match_operand 7 "const_int_operand" " i") > + (match_operand 8 "const_int_operand" " i") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM) > + (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:<V_EXT_LMUL1> [ > + (match_operand:VF_HS 3 "register_operand" " vr") > + (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0") > + ] ANY_FWREDUC_SUM_VL0_SAFE) > + (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " > vu")] UNSPEC_REDUC))] > + "TARGET_VECTOR" > + "vf<reduc_op>.vs\t%0,%3,%4%p1" > + [(set_attr "type" "vfwred<order>") > + (set_attr "mode" "<MODE>") > + (set (attr "frm_mode") > + (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))]) > + > ;; > ------------------------------------------------------------------------------- > ;; ---- Predicated permutation operations > ;; > ------------------------------------------------------------------------------- > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-1.c > new file mode 100644 > index 00000000000..1ab17245ba9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-1.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -fno-vect-cost-model -O3 -mabi=lp64d" } */ > +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ > + > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +/* > +** f1: > +** ... > +** vsetivli zero,1,.* > +** ... > +** vfmv.s.f .* > +** ... > +** vsetvli zero,.* > +** ... > +** vfredosum.vs .* > +** ... > +** vfmv.f.s .* > +** ... > +*/ > + > +float f1(float *arr, int n) > +{ > + float sum = 0; > + for (int i = 0; i < n; i++) > + sum += arr[i]; > + return sum; > +} > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-2.c > new file mode 100644 > index 00000000000..619d757a14f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118182-2.c > @@ -0,0 +1,27 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -fno-vect-cost-model -O3 -mabi=lp64d > -ffast-math" } */ > +/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */ > + > +/* { dg-final { check-function-bodies "**" "" } } */ > + > +/* > +** f1: > +** ... > +** vsetvli [ast][0-9]+,zero,.* > +** ... > +** vmv.s.x .* > +** ... > +** vfredusum.vs .* > +** ... > +** vfmv.f.s .* > +** ... > +*/ > + > +float f1(float *arr, int n) > +{ > + float sum = 0; > + for (int i = 0; i < n; i++) > + sum += arr[i]; > + return sum; > +} > +/* { dg-final { scan-assembler-not {\tvsetivli\tzero,1,.*} } } */ > diff --git a/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f > b/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f > new file mode 100644 > index 00000000000..7ecbfeb863b > --- /dev/null > +++ b/gcc/testsuite/gfortran.target/riscv/rvv/pr118182.f > @@ -0,0 +1,63 @@ > +! { dg-do run } > +! { dg-options "-fno-vect-cost-model" } > + > + program dqnorm_calculator > + implicit none > + > + ! Declare variables > + integer, parameter :: nx = 33, ny = 33, nz =16 > + real(8) :: dq(5, nx, ny, nz) > + real(8) :: result, expected_result, tolerance > + integer :: i, j, k, l > + > + ! Initialize the dq array with values calculated as k + j + i + 5 > + do k = 1, nz > + do j = 1, ny > + do i = 1, nx > + do l = 1, 5 > + dq(l, i, j, k) = k + j + i + 5 > + end do > + end do > + end do > + end do > + > + ! Call the subroutine to calculate the norm > + call redsum(dq, nx, ny, nz, result) > + > + ! Check the result > + expected_result = 214213560.0d0 > + tolerance = 0.0001d0 > + if (abs(result - expected_result) > tolerance) then > + print *, "Result is incorrect: ", result > + call abort() > + end if > + end > + > + subroutine redsum(dq, nx, ny, nz, result) > + implicit none > + > + ! Declare arguments and local variables > + integer, intent(in) :: nx, ny, nz > + real(8), intent(in) :: dq(5, nx, ny, nz) > + real(8), intent(out) :: result > + real(8) :: dqnorm > + integer :: i, j, k, l > + > + ! Initialize dqnorm > + dqnorm = 0.0d0 > + > + ! Compute the sum of squares of dq elements > + do k = 1, nz > + do j = 1, ny > + do i = 1, nx > + do l = 1, 5 > + dqnorm = dqnorm + dq(l, i, j, k) * dq(l, i, j, k) > + end do > + end do > + end do > + end do > + > + result = dqnorm > + > + end subroutine redsum > + > -- > 2.34.1 >