This patch support combining cond extend and reduce_sum to cond widen reduce_sum like combine the following three insns: (set (reg:RVVM2HI 149) (const_vector:RVVM2HI repeat [ (const_int 0) ])) (set (reg:RVVM2HI 138) (if_then_else:RVVM2HI (reg:RVVMF8BI 135) (reg:RVVM2HI 148) (reg:RVVM2HI 149))) (set (reg:HI 150) (unspec:HI [ (reg:RVVM2HI 138) ] UNSPEC_REDUC_SUM)) into one insn: (set (reg:SI 147) (unspec:SI [ (if_then_else:RVVM2SI (reg:RVVMF16BI 135) (sign_extend:RVVM2SI (reg:RVVM1HI 136)) (const_vector:RVVM2SI repeat [ (const_int 0) ])) ] UNSPEC_REDUC_SUM))
Consider the following C code: int16_t foo (int8_t *restrict a, int8_t *restrict pred) { int16_t sum = 0; for (int i = 0; i < 16; i += 1) if (pred[i]) sum += a[i]; return sum; } assembly before this patch: foo: vsetivli zero,16,e16,m2,ta,ma li a5,0 vmv.v.i v2,0 vsetvli zero,zero,e8,m1,ta,ma vl1re8.v v0,0(a1) vmsne.vi v0,v0,0 vsetvli zero,zero,e16,m2,ta,mu vle8.v v4,0(a0),v0.t vmv.s.x v1,a5 vsext.vf2 v2,v4,v0.t vredsum.vs v2,v2,v1 vmv.x.s a0,v2 slliw a0,a0,16 sraiw a0,a0,16 ret assembly after this patch: foo: li a5,0 vsetivli zero,16,e16,m1,ta,ma vmv.s.x v3,a5 vsetivli zero,16,e8,m1,ta,ma vl1re8.v v0,0(a1) vmsne.vi v0,v0,0 vle8.v v2,0(a0),v0.t vwredsum.vs v1,v2,v3,v0.t vsetivli zero,0,e16,m1,ta,ma vmv.x.s a0,v1 slliw a0,a0,16 sraiw a0,a0,16 ret gcc/ChangeLog: * config/riscv/autovec-opt.md (*cond_widen_reduc_plus_scal_<mode>): New combine patterns. * config/riscv/autovec.md (vcond_mask_<mode><vm>): Split vcond_mask pattern into three patterns. (vec_duplicate_const_0<mode>): Ditto. (*vcond_mask_<mode><vm>): Ditto. * config/riscv/predicates.md (vector_register_or_const_0_operand): New. * config/riscv/riscv-protos.h (enum insn_type): Add REDUCE_OP_M. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c: New test. * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c: New test. --- gcc/config/riscv/autovec-opt.md | 48 +++++++++++++++ gcc/config/riscv/autovec.md | 59 ++++++++++++++++++- gcc/config/riscv/predicates.md | 5 ++ gcc/config/riscv/riscv-protos.h | 1 + .../rvv/autovec/cond/cond_widen_reduc-1.c | 30 ++++++++++ .../rvv/autovec/cond/cond_widen_reduc_run-1.c | 28 +++++++++ 6 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index b47bae16193..eefa4f28a0a 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1284,6 +1284,54 @@ } [(set_attr "type" "vector")]) +;; Combine mask extend + vredsum to mask vwredsum[u] +(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>" + [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand") + (unspec:<V_DOUBLE_EXTEND_VEL> [ + (if_then_else:<V_DOUBLE_EXTEND> + (match_operand:<VM> 1 "register_operand") + (any_extend:<V_DOUBLE_EXTEND> + (match_operand:VI_QHS_NO_M8 2 "register_operand")) + (match_operand:<V_DOUBLE_EXTEND> 3 "vector_const_0_operand")) + ] UNSPEC_REDUC_SUM))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx ops[] = {operands[0], operands[2], operands[1], + gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; + riscv_vector::expand_reduction (<WREDUC_UNSPEC>, + riscv_vector::REDUCE_OP_M, + ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); + DONE; +} +[(set_attr "type" "vector")]) + +;; Combine mask extend + vfredsum to mask vfwredusum +(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>" + [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand") + (unspec:<V_DOUBLE_EXTEND_VEL> [ + (if_then_else:<V_DOUBLE_EXTEND> + (match_operand:<VM> 1 "register_operand") + (float_extend:<V_DOUBLE_EXTEND> + (match_operand:VF_HS_NO_M8 2 "register_operand")) + (match_operand:<V_DOUBLE_EXTEND> 3 "vector_const_0_operand")) + ] UNSPEC_REDUC_SUM_UNORDERED))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx ops[] = {operands[0], operands[2], operands[1], + gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)}; + riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED, + riscv_vector::REDUCE_OP_M_FRM_DYN, + ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode)); + DONE; +} +[(set_attr "type" "vector")]) + ;; ============================================================================= ;; Misc combine patterns ;; ============================================================================= diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 493d5745485..20a71ad8ced 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -545,7 +545,64 @@ ;; - vfmerge.vf ;; ------------------------------------------------------------------------- -(define_insn_and_split "vcond_mask_<mode><vm>" +;; The purpose of splitting the original pattern into three patterns here is +;; to combine the following three insns: +;; (set (reg:RVVM2HI 149) +;; (const_vector:RVVM2HI repeat [ +;; (const_int 0) +;; ])) +;; (set (reg:RVVM2HI 138) +;; (if_then_else:RVVM2HI +;; (reg:RVVMF8BI 135) +;; (reg:RVVM2HI 148) +;; (reg:RVVM2HI 149))) +;; (set (reg:HI 150) +;; (unspec:HI [ +;; (reg:RVVM2HI 138) +;; ] UNSPEC_REDUC_SUM)) +;; +;; into one insn: +;; +;; (set (reg:SI 147) +;; (unspec:SI [ +;; (if_then_else:RVVM2SI +;; (reg:RVVMF16BI 135) +;; (sign_extend:RVVM2SI (reg:RVVM1HI 136)) +;; (const_vector:RVVM2SI repeat [ +;; (const_int 0) +;; ])) +;; ] UNSPEC_REDUC_SUM)) + +(define_expand "vcond_mask_<mode><vm>" + [(set (match_operand:V_VLS 0 "register_operand") + (if_then_else:V_VLS + (match_operand:<VM> 3 "register_operand") + (match_operand:V_VLS 1 "nonmemory_operand") + (match_operand:V_VLS 2 "vector_register_or_const_0_operand")))] + "TARGET_VECTOR" + { + if (satisfies_constraint_Wc0 (operands[2])) + { + rtx reg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_vec_duplicate_const_0<mode> (reg, operands[2])); + operands[2] = reg; + } + }) + +(define_insn_and_split "vec_duplicate_const_0<mode>" + [(set (match_operand:V_VLS 0 "register_operand") + (match_operand:V_VLS 1 "vector_const_0_operand"))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + emit_move_insn (operands[0], operands[1]); + DONE; +} + [(set_attr "type" "vector")]) + +(define_insn_and_split "*vcond_mask_<mode><vm>" [(set (match_operand:V_VLS 0 "register_operand") (if_then_else:V_VLS (match_operand:<VM> 3 "register_operand") diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 4bc7ff2c9d8..6abf9d97958 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -463,6 +463,11 @@ (ior (match_operand 0 "register_operand") (match_code "const_vector"))) +(define_predicate "vector_register_or_const_0_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_vector") + (match_test "satisfies_constraint_Wc0 (op)")))) + (define_predicate "vector_gs_scale_operand_16" (and (match_code "const_int") (match_test "INTVAL (op) == 1 || INTVAL (op) == 2"))) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 5a2d218d67b..fd6107ccb5c 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -337,6 +337,7 @@ enum insn_type : unsigned int /* For vreduce, no mask policy operand. */ REDUCE_OP = __NORMAL_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P, + REDUCE_OP_M = __MASK_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P, REDUCE_OP_FRM_DYN = REDUCE_OP | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P, REDUCE_OP_M_FRM_DYN = __MASK_OP_TA | BINARY_OP_P | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P, diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c new file mode 100644 index 00000000000..22a71048684 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */ +#include <stdint-gcc.h> + +#define TEST_TYPE(TYPE1, TYPE2, N) \ + __attribute__ ((noipa)) \ + TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred) \ + { \ + TYPE1 sum = 0; \ + for (int i = 0; i < N; i += 1) \ + if (pred[i]) \ + sum += a[i]; \ + return sum; \ + } + +#define TEST_ALL(TEST) \ + TEST (int16_t, int8_t, 16) \ + TEST (int32_t, int16_t, 8) \ + TEST (int64_t, int32_t, 4) \ + TEST (uint16_t, uint8_t, 16) \ + TEST (uint32_t, uint16_t, 8) \ + TEST (uint64_t, uint32_t, 4) \ + TEST (float, _Float16, 8) \ + TEST (double, float, 4) + +TEST_ALL (TEST_TYPE) + +/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */ +/* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */ +/* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c new file mode 100644 index 00000000000..fdb7e5249ee --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c @@ -0,0 +1,28 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */ + +#include "cond_widen_reduc-1.c" + +#define RUN(TYPE1, TYPE2, N) \ + { \ + TYPE2 a[N]; \ + TYPE2 pred[N]; \ + TYPE1 r = 0; \ + for (int i = 0; i < N; i++) \ + { \ + a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \ + pred[i] = i % 3; \ + if (pred[i]) \ + r += a[i]; \ + asm volatile ("" ::: "memory"); \ + } \ + if (r != reduc_##TYPE1##_##TYPE2 (a, pred)) \ + __builtin_abort (); \ + } + +int __attribute__ ((optimize (1))) +main () +{ + TEST_ALL (RUN) + return 0; +} -- 2.36.3