This patch support combining cond extend and reduce_sum to cond widen reduce_sum
like combine the following three insns:
  (set (reg:RVVM2HI 149)
       (const_vector:RVVM2HI repeat [
          (const_int 0)
       ]))
  (set (reg:RVVM2HI 138)
    (if_then_else:RVVM2HI
      (reg:RVVMF8BI 135)
      (reg:RVVM2HI 148)
      (reg:RVVM2HI 149)))
  (set (reg:HI 150)
    (unspec:HI [
      (reg:RVVM2HI 138)
    ] UNSPEC_REDUC_SUM))
into one insn:
  (set (reg:SI 147)
    (unspec:SI [
      (if_then_else:RVVM2SI
        (reg:RVVMF16BI 135)
        (sign_extend:RVVM2SI (reg:RVVM1HI 136))
        (const_vector:RVVM2SI repeat [
          (const_int 0)
        ]))
    ] UNSPEC_REDUC_SUM))

Consider the following C code:

int16_t foo (int8_t *restrict a, int8_t *restrict pred)
{
  int16_t sum = 0;
  for (int i = 0; i < 16; i += 1)
    if (pred[i])
      sum += a[i];
  return sum;
}

assembly before this patch:

foo:
        vsetivli        zero,16,e16,m2,ta,ma
        li      a5,0
        vmv.v.i v2,0
        vsetvli zero,zero,e8,m1,ta,ma
        vl1re8.v        v0,0(a1)
        vmsne.vi        v0,v0,0
        vsetvli zero,zero,e16,m2,ta,mu
        vle8.v  v4,0(a0),v0.t
        vmv.s.x v1,a5
        vsext.vf2       v2,v4,v0.t
        vredsum.vs      v2,v2,v1
        vmv.x.s a0,v2
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

assembly after this patch:

foo:
        li      a5,0
        vsetivli        zero,16,e16,m1,ta,ma
        vmv.s.x v3,a5
        vsetivli        zero,16,e8,m1,ta,ma
        vl1re8.v        v0,0(a1)
        vmsne.vi        v0,v0,0
        vle8.v  v2,0(a0),v0.t
        vwredsum.vs     v1,v2,v3,v0.t
        vsetivli        zero,0,e16,m1,ta,ma
        vmv.x.s a0,v1
        slliw   a0,a0,16
        sraiw   a0,a0,16
        ret

gcc/ChangeLog:

        * config/riscv/autovec-opt.md (*cond_widen_reduc_plus_scal_<mode>):
        New combine patterns.
        * config/riscv/autovec.md (vcond_mask_<mode><vm>):
        Split vcond_mask pattern into three patterns.
        (vec_duplicate_const_0<mode>): Ditto.
        (*vcond_mask_<mode><vm>): Ditto.
        * config/riscv/predicates.md (vector_register_or_const_0_operand): New.
        * config/riscv/riscv-protos.h (enum insn_type): Add REDUCE_OP_M.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c: New test.
        * gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c: New test.

---
 gcc/config/riscv/autovec-opt.md               | 48 +++++++++++++++
 gcc/config/riscv/autovec.md                   | 59 ++++++++++++++++++-
 gcc/config/riscv/predicates.md                |  5 ++
 gcc/config/riscv/riscv-protos.h               |  1 +
 .../rvv/autovec/cond/cond_widen_reduc-1.c     | 30 ++++++++++
 .../rvv/autovec/cond/cond_widen_reduc_run-1.c | 28 +++++++++
 6 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index b47bae16193..eefa4f28a0a 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1284,6 +1284,54 @@
 }
 [(set_attr "type" "vector")])

+;; Combine mask extend + vredsum to mask vwredsum[u]
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (match_operand:<VM> 1 "register_operand")
+            (any_extend:<V_DOUBLE_EXTEND>
+              (match_operand:VI_QHS_NO_M8 2 "register_operand"))
+            (match_operand:<V_DOUBLE_EXTEND> 3 "vector_const_0_operand"))
+        ] UNSPEC_REDUC_SUM))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[2], operands[1],
+               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+                                  riscv_vector::REDUCE_OP_M,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine mask extend + vfredsum to mask vfwredusum
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+  [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+        (unspec:<V_DOUBLE_EXTEND_VEL> [
+          (if_then_else:<V_DOUBLE_EXTEND>
+            (match_operand:<VM> 1 "register_operand")
+            (float_extend:<V_DOUBLE_EXTEND>
+              (match_operand:VF_HS_NO_M8 2 "register_operand"))
+            (match_operand:<V_DOUBLE_EXTEND> 3 "vector_const_0_operand"))
+        ] UNSPEC_REDUC_SUM_UNORDERED))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx ops[] = {operands[0], operands[2], operands[1],
+               gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+  riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+                                  riscv_vector::REDUCE_OP_M_FRM_DYN,
+                                  ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; 
=============================================================================
 ;; Misc combine patterns
 ;; 
=============================================================================
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 493d5745485..20a71ad8ced 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -545,7 +545,64 @@
 ;; - vfmerge.vf
 ;; -------------------------------------------------------------------------

-(define_insn_and_split "vcond_mask_<mode><vm>"
+;; The purpose of splitting the original pattern into three patterns here is
+;; to combine the following three insns:
+;;   (set (reg:RVVM2HI 149)
+;;        (const_vector:RVVM2HI repeat [
+;;           (const_int 0)
+;;        ]))
+;;   (set (reg:RVVM2HI 138)
+;;     (if_then_else:RVVM2HI
+;;       (reg:RVVMF8BI 135)
+;;       (reg:RVVM2HI 148)
+;;       (reg:RVVM2HI 149)))
+;;   (set (reg:HI 150)
+;;     (unspec:HI [
+;;       (reg:RVVM2HI 138)
+;;     ] UNSPEC_REDUC_SUM))
+;;
+;; into one insn:
+;;
+;;   (set (reg:SI 147)
+;;     (unspec:SI [
+;;       (if_then_else:RVVM2SI
+;;         (reg:RVVMF16BI 135)
+;;         (sign_extend:RVVM2SI (reg:RVVM1HI 136))
+;;         (const_vector:RVVM2SI repeat [
+;;           (const_int 0)
+;;         ]))
+;;     ] UNSPEC_REDUC_SUM))
+
+(define_expand "vcond_mask_<mode><vm>"
+  [(set (match_operand:V_VLS 0 "register_operand")
+        (if_then_else:V_VLS
+          (match_operand:<VM> 3 "register_operand")
+          (match_operand:V_VLS 1 "nonmemory_operand")
+          (match_operand:V_VLS 2 "vector_register_or_const_0_operand")))]
+  "TARGET_VECTOR"
+  {
+    if (satisfies_constraint_Wc0 (operands[2]))
+      {
+        rtx reg = gen_reg_rtx (<MODE>mode);
+        emit_insn (gen_vec_duplicate_const_0<mode> (reg, operands[2]));
+        operands[2] = reg;
+      }
+  })
+
+(define_insn_and_split "vec_duplicate_const_0<mode>"
+  [(set (match_operand:V_VLS 0 "register_operand")
+        (match_operand:V_VLS 1 "vector_const_0_operand"))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "vector")])
+
+(define_insn_and_split "*vcond_mask_<mode><vm>"
   [(set (match_operand:V_VLS 0 "register_operand")
         (if_then_else:V_VLS
           (match_operand:<VM> 3 "register_operand")
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 4bc7ff2c9d8..6abf9d97958 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -463,6 +463,11 @@
   (ior (match_operand 0 "register_operand")
        (match_code "const_vector")))

+(define_predicate "vector_register_or_const_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_vector")
+            (match_test "satisfies_constraint_Wc0 (op)"))))
+
 (define_predicate "vector_gs_scale_operand_16"
   (and (match_code "const_int")
        (match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5a2d218d67b..fd6107ccb5c 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -337,6 +337,7 @@ enum insn_type : unsigned int

   /* For vreduce, no mask policy operand. */
   REDUCE_OP = __NORMAL_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
+  REDUCE_OP_M = __MASK_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
   REDUCE_OP_FRM_DYN = REDUCE_OP | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
   REDUCE_OP_M_FRM_DYN
   = __MASK_OP_TA | BINARY_OP_P | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
new file mode 100644
index 00000000000..22a71048684
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param 
riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 
-fno-vect-cost-model -ffast-math" } */
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2, N)                                             
\
+  __attribute__ ((noipa))                                                      
\
+  TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred)      
\
+  {                                                                            
\
+    TYPE1 sum = 0;                                                             
\
+    for (int i = 0; i < N; i += 1)                                             
\
+      if (pred[i])                                                             
\
+       sum += a[i];                                                           \
+    return sum;                                                                
\
+  }
+
+#define TEST_ALL(TEST)                                                         
\
+  TEST (int16_t, int8_t, 16)                                                   
\
+  TEST (int32_t, int16_t, 8)                                                   
\
+  TEST (int64_t, int32_t, 4)                                                   
\
+  TEST (uint16_t, uint8_t, 16)                                                 
\
+  TEST (uint32_t, uint16_t, 8)                                                 
\
+  TEST (uint64_t, uint32_t, 4)                                                 
\
+  TEST (float, _Float16, 8)                                                    
\
+  TEST (double, float, 4)
+
+TEST_ALL (TEST_TYPE)
+
+/* { dg-final { scan-assembler-times 
{\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times 
{\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times 
{\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c
new file mode 100644
index 00000000000..fdb7e5249ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cond/cond_widen_reduc_run-1.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable 
-fno-vect-cost-model" } */
+
+#include "cond_widen_reduc-1.c"
+
+#define RUN(TYPE1, TYPE2, N)                                                   
\
+  {                                                                            
\
+    TYPE2 a[N];                                                                
\
+    TYPE2 pred[N];                                                             
\
+    TYPE1 r = 0;                                                               
\
+    for (int i = 0; i < N; i++)                                                
\
+      {                                                                        
\
+       a[i] = (i * 0.1) * (i & 1 ? 1 : -1);                                   \
+       pred[i] = i % 3;                                                       \
+       if (pred[i])                                                           \
+         r += a[i];                                                           \
+       asm volatile ("" ::: "memory");                                        \
+      }                                                                        
\
+    if (r != reduc_##TYPE1##_##TYPE2 (a, pred))                                
\
+      __builtin_abort ();                                                      
\
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (RUN)
+  return 0;
+}
--
2.36.3

Reply via email to