For 'wv' instructions, e.g. vwadd.wv vd,vs2,vs1.
vs2 has same EEW as vd.
vs1 has smaller than vd.
So, vs2 can overlap with vd, but vs1 can only overlap highest-number of vd
when LMUL of vs1 is greater than 1.
We already have supported overlap for vs1 LMUL >= 1.
But I forget vs1 LMUL < 1, vs2 can overlap vd even though vs1 totally can not
overlap vd.
Consider the reduction auto-vectorization:
int64_t
reduc_plus_int (int *__restrict a, int n)
{
int64_t r = 0;
for (int i = 0; i < n; ++i)
r += a[i];
return r;
}
Before this patch:
reduc_plus_int:
ble a1,zero,.L4
vsetvli a5,zero,e64,m1,ta,ma
vmv.v.i v1,0
.L3:
vsetvli a5,a1,e32,mf2,tu,ma
slli a4,a5,2
sub a1,a1,a5
vle32.v v2,0(a0)
vmv1r.v v3,v1 ----> This should be removed.
add a0,a0,a4
vwadd.wv v1,v3,v2 ----> vs2 should be v1
bne a1,zero,.L3
li a5,0
vsetivli zero,1,e64,m1,ta,ma
vmv.s.x v2,a5
vsetvli a5,zero,e64,m1,ta,ma
vredsum.vs v1,v1,v2
vmv.x.s a0,v1
ret
.L4:
li a0,0
ret
After this patch:
reduc_plus_int:
ble a1,zero,.L4
vsetvli a5,zero,e64,m1,ta,ma
vmv.v.i v1,0
.L3:
vsetvli a5,a1,e32,mf2,tu,ma
slli a4,a5,2
sub a1,a1,a5
vle32.v v2,0(a0)
add a0,a0,a4
vwadd.wv v1,v1,v2
bne a1,zero,.L3
li a5,0
vsetivli zero,1,e64,m1,ta,ma
vmv.s.x v2,a5
vsetvli a5,zero,e64,m1,ta,ma
vredsum.vs v1,v1,v2
vmv.x.s a0,v1
ret
.L4:
li a0,0
ret
PR target/112432
gcc/ChangeLog:
* config/riscv/riscv.md (none,W21,W42,W84,W43,W86,W87): Add W0.
(none,W21,W42,W84,W43,W86,W87,W0): Ditto.
* config/riscv/vector.md: Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/pr112432-42.c: New test.
---
gcc/config/riscv/riscv.md | 14 +++-
gcc/config/riscv/vector.md | 84 +++++++++----------
.../gcc.target/riscv/rvv/base/pr112432-42.c | 30 +++++++
3 files changed, 82 insertions(+), 46 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index eed997116b0..ee8b71c22aa 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -503,7 +503,7 @@
;; Widening instructions have group-overlap constraints. Those are only
;; valid for certain register-group sizes. This attribute marks the
;; alternatives not matching the required register-group size as disabled.
-(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87"
+(define_attr "group_overlap" "none,W21,W42,W84,W43,W86,W87,W0"
(const_string "none"))
(define_attr "group_overlap_valid" "no,yes"
@@ -524,9 +524,9 @@
;; According to RVV ISA:
;; The destination EEW is greater than the source EEW, the source
EMUL is at least 1,
- ;; and the overlap is in the highest-numbered part of the destination
register group
- ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0,
v2, or v4 is not).
- ;; So the source operand should have LMUL >= 1.
+ ;; and the overlap is in the highest-numbered part of the destination
register group
+ ;; (e.g., when LMUL=8, vzext.vf4 v0, v6 is legal, but a source of v0,
v2, or v4 is not).
+ ;; So the source operand should have LMUL >= 1.
(and (eq_attr "group_overlap" "W43")
(match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0]))
!= 4
&& riscv_get_v_regno_alignment (GET_MODE
(operands[3])) >= 1"))
@@ -536,6 +536,12 @@
(match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0]))
!= 8
&& riscv_get_v_regno_alignment (GET_MODE
(operands[3])) >= 1"))
(const_string "no")
+
+ ;; W21 supports highest-number overlap for source LMUL = 1.
+ ;; For 'wv' variant, we can also allow wide source operand overlaps
dest operand.
+ (and (eq_attr "group_overlap" "W0")
+ (match_test "riscv_get_v_regno_alignment (GET_MODE (operands[0]))
> 1"))
+ (const_string "no")
]
(const_string "yes")))
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 7646615b12a..d475e14a823 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -3776,48 +3776,48 @@
(set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
(define_insn "@pred_single_widen_sub<any_extend:su><mode>"
- [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd,
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+ [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr,
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, &vr, ?&vr")
(if_then_else:VWEXTI
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(minus:VWEXTI
- (match_operand:VWEXTI 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr")
+ (match_operand:VWEXTI 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr")
(any_extend:VWEXTI
- (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")))
- (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))]
+ (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr")))
+ (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vwsub<any_extend:u>.wv\t%0,%3,%4%p1"
[(set_attr "type" "viwalu")
(set_attr "mode" "<V_DOUBLE_TRUNC>")
- (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+ (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
(define_insn "@pred_single_widen_add<any_extend:su><mode>"
- [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd,
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+ [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr,
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, &vr, ?&vr")
(if_then_else:VWEXTI
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(plus:VWEXTI
(any_extend:VWEXTI
- (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr"))
- (match_operand:VWEXTI 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr"))
- (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))]
+ (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr"))
+ (match_operand:VWEXTI 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr"))
+ (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vwadd<any_extend:u>.wv\t%0,%3,%4%p1"
[(set_attr "type" "viwalu")
(set_attr "mode" "<V_DOUBLE_TRUNC>")
- (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+ (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
(define_insn
"@pred_single_widen_<plus_minus:optab><any_extend:su><mode>_scalar"
[(set (match_operand:VWEXTI 0 "register_operand" "=vr,
vr")
@@ -7056,56 +7056,56 @@
(set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
(define_insn "@pred_single_widen_add<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd,
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+ [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr,
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, &vr, ?&vr")
(if_then_else:VWEXTF
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 9 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 9 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
(plus:VWEXTF
(float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr"))
- (match_operand:VWEXTF 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr"))
- (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))]
+ (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr"))
+ (match_operand:VWEXTF 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr"))
+ (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vfwadd.wv\t%0,%3,%4%p1"
[(set_attr "type" "vfwalu")
(set_attr "mode" "<V_DOUBLE_TRUNC>")
(set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))
- (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+ (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
(define_insn "@pred_single_widen_sub<mode>"
- [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd,
vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr")
+ [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr,
vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, &vr, &vr, &vr, ?&vr")
(if_then_else:VWEXTF
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1")
- (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
- (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
- (match_operand 9 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i")
+ [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1,
vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1,vmWc1,vmWc1")
+ (match_operand 5 "vector_length_operand" " rK, rK,
rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK")
+ (match_operand 6 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 8 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
+ (match_operand 9 "const_int_operand" " i, i,
i, i, i, i, i, i, i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
(minus:VWEXTF
- (match_operand:VWEXTF 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr")
+ (match_operand:VWEXTF 3 "register_operand" " vr, vr,
vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, 0, 0, vr, vr")
(float_extend:VWEXTF
- (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")))
- (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))]
+ (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr, vr, vr")))
+ (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu,
0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"vfwsub.wv\t%0,%3,%4%p1"
[(set_attr "type" "vfwalu")
(set_attr "mode" "<V_DOUBLE_TRUNC>")
(set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))
- (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")])
+ (set_attr "group_overlap"
"W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,W0,W0,none,none")])
(define_insn "@pred_single_widen_<plus_minus:optab><mode>_scalar"
[(set (match_operand:VWEXTF 0 "register_operand" "=vr,
vr")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c
new file mode 100644
index 00000000000..1ee5b20a899
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112432-42.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ffast-math" } */
+
+#include <stdint-gcc.h>
+
+int64_t
+reduc_plus_int (int *__restrict a, int n)
+{
+ int64_t r = 0;
+ for (int i = 0; i < n; ++i)
+ r += a[i];
+ return r;
+}
+
+double
+reduc_plus_float (float *__restrict a, int n)
+{
+ double r = 0;
+ for (int i = 0; i < n; ++i)
+ r += a[i];
+ return r;
+}
+
+/* { dg-final { scan-assembler-not {vmv1r} } } */
+/* { dg-final { scan-assembler-not {vmv2r} } } */
+/* { dg-final { scan-assembler-not {vmv4r} } } */
+/* { dg-final { scan-assembler-not {vmv8r} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-assembler-times {vwadd\.wv} 1 } } */
+/* { dg-final { scan-assembler-times {vfwadd\.wv} 1 } } */
--
2.36.3