From: Pan Li <[email protected]>
This patch would like to combine the vec_duplicate + vadd.vv to the
vadd.vx. From example as below code. The related pattern will depend
on the cost of vec_duplicate from GR2VR, it will:
* The pattern matching will be inactive if GR2VR cost is zero.
* The cost of GR2VR will be added to the total cost of pattern, and
the late-combine will decide to perform the replacement or not
based on the cost value.
Assume we have example code like below, GR2VR cost is 2 by default.
#define DEF_VX_BINARY(T, OP) \
void \
test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \
{ \
for (unsigned i = 0; i < n; i++) \
out[i] = in[i] OP x; \
}
DEF_VX_BINARY(int32_t, +)
Before this patch:
10 │ test_binary_vx_add:
11 │ beq a3,zero,.L8
12 │ vsetvli a5,zero,e32,m1,ta,ma // eliminated if GR2VR cost non-zero
13 │ vmv.v.x v2,a2 // Ditto.
14 │ slli a3,a3,32
15 │ srli a3,a3,32
16 │ .L3:
17 │ vsetvli a5,a3,e32,m1,ta,ma
18 │ vle32.v v1,0(a1)
19 │ slli a4,a5,2
20 │ sub a3,a3,a5
21 │ add a1,a1,a4
22 │ vadd.vv v1,v2,v1
23 │ vse32.v v1,0(a0)
24 │ add a0,a0,a4
25 │ bne a3,zero,.L3
After this patch:
10 │ test_binary_vx_add:
11 │ beq a3,zero,.L8
12 │ slli a3,a3,32
13 │ srli a3,a3,32
14 │ .L3:
15 │ vsetvli a5,a3,e32,m1,ta,ma
16 │ vle32.v v1,0(a1)
17 │ slli a4,a5,2
18 │ sub a3,a3,a5
19 │ add a1,a1,a4
20 │ vadd.vx v1,v1,a2
21 │ vse32.v v1,0(a0)
22 │ add a0,a0,a4
23 │ bne a3,zero,.L3
The below test suites are passed for this patch.
* The rv64gcv fully regression test.
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*<optab>_vx_<mode>): Add new
combine to convert vec_duplicate + vadd.vv to vaddvx on GR2VR
cost.
* config/riscv/riscv.cc (riscv_rtx_costs): Extract vector
cost into a separated func.
(riscv_vector_rtx_costs): Add new func to take care of the
cost of vector rtx, default to 1 and append GR2VR cost to
vec_duplicate rtx.
* config/riscv/vector-iterators.md: Add new iterator for vx.
Signed-off-by: Pan Li <[email protected]>
---
gcc/config/riscv/autovec-opt.md | 22 ++++++++++++++++++++++
gcc/config/riscv/riscv.cc | 26 ++++++++++++++++++++------
gcc/config/riscv/vector-iterators.md | 4 ++++
3 files changed, 46 insertions(+), 6 deletions(-)
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 0c3b0cc7e05..ab45fe2511b 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1673,3 +1673,25 @@ (define_insn_and_split "*vandn_<mode>"
DONE;
}
[(set_attr "type" "vandn")])
+
+;;
=============================================================================
+;; Combine vec_duplicate + op.vv to op.vx
+;; Include
+;; - vadd.vx
+;;
=============================================================================
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (any_int_binop_no_shift_vx:V_VLSI
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 1 "register_operand"))
+ (match_operand:V_VLSI 2 "<binop_rhs2_predicate>")))]
+ "TARGET_VECTOR && can_create_pseudo_p () && get_vector_costs
()->regmove->GR2VR != 0"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[2], operands[1]};
+ riscv_vector::emit_vlmax_insn (code_for_pred_scalar (<CODE>, <MODE>mode),
+ riscv_vector::BINARY_OP, ops);
+ }
+ [(set_attr "type" "vialu")])
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 38f3ae7cd84..9bd0dbcf5f6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3856,16 +3856,30 @@ riscv_extend_cost (rtx op, bool unsigned_p)
#define SINGLE_SHIFT_COST 1
static bool
-riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno
ATTRIBUTE_UNUSED,
- int *total, bool speed)
+riscv_vector_rtx_costs (rtx x, machine_mode mode, int *total)
{
+ gcc_assert (riscv_v_ext_mode_p (mode));
+
/* TODO: We set RVV instruction cost as 1 by default.
Cost Model need to be well analyzed and supported in the future. */
+ int cost_val = 1;
+ enum rtx_code rcode = GET_CODE (x);
+
+ /* Aka (vec_duplicate:RVVM1DI (reg/v:DI 143 [ x ])) */
+ if (rcode == VEC_DUPLICATE && SCALAR_INT_MODE_P (GET_MODE (XEXP (x, 0))))
+ cost_val += get_vector_costs ()->regmove->GR2VR;
+
+ *total = COSTS_N_INSNS (cost_val);
+
+ return true;
+}
+
+static bool
+riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno
ATTRIBUTE_UNUSED,
+ int *total, bool speed)
+{
if (riscv_v_ext_mode_p (mode))
- {
- *total = COSTS_N_INSNS (1);
- return true;
- }
+ return riscv_vector_rtx_costs (x, mode, total);
bool float_mode_p = FLOAT_MODE_P (mode);
int cost;
diff --git a/gcc/config/riscv/vector-iterators.md
b/gcc/config/riscv/vector-iterators.md
index f8da71b1d65..b5fc833f1d5 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4052,6 +4052,10 @@ (define_code_iterator any_int_binop_no_shift
[plus minus and ior xor smax umax smin umin mult div udiv mod umod
])
+(define_code_iterator any_int_binop_no_shift_vx
+ [plus
+])
+
(define_code_iterator any_sat_int_binop [ss_plus ss_minus us_plus us_minus])
(define_code_iterator sat_int_plus_binop [ss_plus us_plus])
(define_code_iterator sat_int_minus_binop [ss_minus us_minus])
--
2.43.0