[PATCH] RISC-V: Support one more overlap for wv instructions

Juzhe-Zhong Mon, 18 Dec 2023 02:59:52 -0800

For 'wv' instructions, e.g. vwadd.wv vd,vs2,vs1.

vs2 has same EEW as vd.
vs1 has smaller than vd.


So, vs2 can overlap with vd, but vs1 can only overlap highest-number of vd
when LMUL of vs1 is greater than 1.

We already have supported overlap for vs1 LMUL >= 1.
But I forget vs1 LMUL < 1, vs2 can overlap vd even though vs1 totally can not 
overlap vd.

Consider the reduction auto-vectorization:

int64_t
reduc_plus_int (int *__restrict a, int n)
{
  int64_t r = 0;
  for (int i = 0; i < n; ++i)
    r += a[i];
  return r;
}

Before this patch:

reduc_plus_int:
        ble     a1,zero,.L4
        vsetvli a5,zero,e64,m1,ta,ma
        vmv.v.i v1,0
.L3:
        vsetvli a5,a1,e32,mf2,tu,ma
        slli    a4,a5,2
        sub     a1,a1,a5
        vle32.v v2,0(a0)
        vmv1r.v v3,v1                  ---->  This should be removed.
        add     a0,a0,a4
        vwadd.wv        v1,v3,v2       ---->  vs2 should be v1
        bne     a1,zero,.L3
        li      a5,0
        vsetivli        zero,1,e64,m1,ta,ma
        vmv.s.x v2,a5
        vsetvli a5,zero,e64,m1,ta,ma
        vredsum.vs      v1,v1,v2
        vmv.x.s a0,v1
        ret
.L4:
        li      a0,0
        ret

After this patch:

reduc_plus_int:
        ble     a1,zero,.L4
        vsetvli a5,zero,e64,m1,ta,ma
        vmv.v.i v1,0
.L3:
        vsetvli a5,a1,e32,mf2,tu,ma
        slli    a4,a5,2
        sub     a1,a1,a5
        vle32.v v2,0(a0)
        add     a0,a0,a4
        vwadd.wv        v1,v1,v2
        bne     a1,zero,.L3
        li      a5,0
        vsetivli        zero,1,e64,m1,ta,ma
        vmv.s.x v2,a5
        vsetvli a5,zero,e64,m1,ta,ma
        vredsum.vs      v1,v1,v2
        vmv.x.s a0,v1
        ret
.L4:
        li      a0,0
        ret

        PR target/112432

gcc/ChangeLog:

        * config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87): Add W0.
        (none,W21,W42,W84,W43,W86,W87,W0): Ditto.
        * config/riscv/vector.md: Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/base/pr112432-42.c: New test.

---
 gcc/config/riscv/riscv.md                     | 14 +++-
 gcc/config/riscv/vector.md                    | 84 +++++++++----------
 .../gcc.target/riscv/rvv/base/pr112432-42.c   | 30 +++++++
 3 files changed, 82 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index eed997116b0..ee8b71c22aa 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -503,7 +503,7 @@
 ;; Widening instructions have group-overlap constraints.  Those are only
 ;; valid for certain register-group sizes.  This attribute marks the
 ;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87"
+(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
   (const_string "none"))
 
 (define_attr "group_overlap_valid" "no,yes"
@@ -524,9 +524,9 @@
 
          ;; According to RVV ISA:
          ;; The destination EEW is greater than the source EEW, the source 
EMUL is at least 1,
-        ;; and the overlap is in the highest-numbered part of the destination 
register group
-        ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, 
v2, or v4 is not).
-        ;; So the source operand should have LMUL >= 1.
+         ;; and the overlap is in the highest-numbered part of the destination 
register group
+         ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0, 
v2, or v4 is not).
+         ;; So the source operand should have LMUL >= 1.
          (and (eq_attr "group_overlap" "W43")
              (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 4
                           && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
@@ -536,6 +536,12 @@
              (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
!= 8
                           && riscv_get_v_regno_alignment (GET_MODE 
(operands[3])) >= 1"))
         (const_string "no")
+
+         ;; W21 supports highest-number overlap for source LMUL = 1.
+         ;; For 'wv' variant, we can also allow wide source operand overlaps 
dest operand.
+         (and (eq_attr "group_overlap" "W0")
+             (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0])) 
> 1"))
+        (const_string "no")
         ]
        (const_string "yes")))
 
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 7646615b12a..d475e14a823 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3776,48 +3776,48 @@
    (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
 
 (define_insn "@pred_single_widen_sub<any_extend:su><mode>"
-  [(set (match_operand:VWEXTI 0 "register_operand"             "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"                 "=vd, vr, 
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr,  &vr,  &vr,  &vr, ?&vr")
        (if_then_else:VWEXTI
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK,   rK,   rK")
+            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (minus:VWEXTI
-           (match_operand:VWEXTI 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,   vr,   vr")
+           (match_operand:VWEXTI 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,    0,    0,   vr,   vr")
            (any_extend:VWEXTI
-             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr")))
-         (match_operand:VWEXTI 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0")))]
+             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr,   vr,   vr")))
+         (match_operand:VWEXTI 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "vwsub<any_extend:u>.wv\t%0,%3,%4%p1"
   [(set_attr "type" "viwalu")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
-   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
 
 (define_insn "@pred_single_widen_add<any_extend:su><mode>"
-  [(set (match_operand:VWEXTI 0 "register_operand"             "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTI 0 "register_operand"                 "=vd, vr, 
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr,  &vr,  &vr,  &vr, ?&vr")
        (if_then_else:VWEXTI
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK,   rK,   rK")
+            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
          (plus:VWEXTI
            (any_extend:VWEXTI
-             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr"))
-           (match_operand:VWEXTI 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,   vr,   vr"))
-         (match_operand:VWEXTI 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0")))]
+             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr,   vr,   vr"))
+           (match_operand:VWEXTI 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,    0,    0,   vr,   vr"))
+         (match_operand:VWEXTI 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "vwadd<any_extend:u>.wv\t%0,%3,%4%p1"
   [(set_attr "type" "viwalu")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
-   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
 
 (define_insn 
"@pred_single_widen_<plus_minus:optab><any_extend:su><mode>_scalar"
   [(set (match_operand:VWEXTI 0 "register_operand"                   "=vr,   
vr")
@@ -7056,56 +7056,56 @@
    (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
 
 (define_insn "@pred_single_widen_add<mode>"
-  [(set (match_operand:VWEXTF 0 "register_operand"             "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTF 0 "register_operand"                 "=vd, vr, 
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr,  &vr,  &vr,  &vr, ?&vr")
        (if_then_else:VWEXTF
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 9 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK,   rK,   rK")
+            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 9 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)
             (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
          (plus:VWEXTF
            (float_extend:VWEXTF
-             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr"))
-           (match_operand:VWEXTF 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,   vr,   vr"))
-         (match_operand:VWEXTF 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0")))]
+             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr,   vr,   vr"))
+           (match_operand:VWEXTF 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,    0,    0,   vr,   vr"))
+         (match_operand:VWEXTF 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "vfwadd.wv\t%0,%3,%4%p1"
   [(set_attr "type" "vfwalu")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set (attr "frm_mode")
        (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))
-   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
 
 (define_insn "@pred_single_widen_sub<mode>"
-  [(set (match_operand:VWEXTF 0 "register_operand"             "=vd, vr, vd, 
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+  [(set (match_operand:VWEXTF 0 "register_operand"                 "=vd, vr, 
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr,  &vr,  &vr,  &vr, ?&vr")
        (if_then_else:VWEXTF
          (unspec:<VM>
-           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
-            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK")
-            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
-            (match_operand 9 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i")
+           [(match_operand:<VM> 1 "vector_mask_operand"           " vm,Wc1, 
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+            (match_operand 5 "vector_length_operand"              " rK, rK, 
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK,   rK,   rK,   rK,   rK")
+            (match_operand 6 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 7 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 8 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
+            (match_operand 9 "const_int_operand"                  "  i,  i,  
i,  i,  i,  i,  i,  i,  i,  i,  i,  i,    i,    i,    i,    i")
             (reg:SI VL_REGNUM)
             (reg:SI VTYPE_REGNUM)
             (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
          (minus:VWEXTF
-           (match_operand:VWEXTF 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,   vr,   vr")
+           (match_operand:VWEXTF 3 "register_operand"             " vr, vr, 
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr,    0,    0,   vr,   vr")
            (float_extend:VWEXTF
-             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr")))
-         (match_operand:VWEXTF 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0")))]
+             (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,   vr,   vr,   vr,   vr")))
+         (match_operand:VWEXTF 2 "vector_merge_operand"           " vu, vu,  
0,  0, vu, vu,  0,  0, vu, vu,  0,  0,   vu,    0,   vu,    0")))]
   "TARGET_VECTOR"
   "vfwsub.wv\t%0,%3,%4%p1"
   [(set_attr "type" "vfwalu")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set (attr "frm_mode")
        (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))
-   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+   (set_attr "group_overlap" 
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
 
 (define_insn "@pred_single_widen_<plus_minus:optab><mode>_scalar"
   [(set (match_operand:VWEXTF 0 "register_operand"                   "=vr,   
vr")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c
new file mode 100644
index 00000000000..1ee5b20a899
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ffast-math" } */
+
+#include <stdint-gcc.h>
+
+int64_t
+reduc_plus_int (int *__restrict a, int n)
+{
+  int64_t r = 0;
+  for (int i = 0; i < n; ++i)
+    r += a[i];
+  return r;
+}
+
+double
+reduc_plus_float (float *__restrict a, int n)
+{
+  double r = 0;
+  for (int i = 0; i < n; ++i)
+    r += a[i];
+  return r;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vwadd\.wv} 1 } } */
+/* { dg-final { scan-assembler-times {vfwadd\.wv} 1 } } */
-- 
2.36.3

[PATCH] RISC-V: Support one more overlap for wv instructions

Reply via email to