This pattern enables the combine pass (or late-combine, depending on the case) to merge a float_extend'ed vec_duplicate into a plus RTL instruction. The other plus operand is already wide.
Before this patch, we have four instructions, e.g.: fcvt.d.s fa0,fa0 vsetvli a5,zero,e64,m1,ta,ma vfmv.v.f v2,fa0 vfadd.vv v1,v1,v2 After, we get only one: vfwadd.wf v1,v1,fa0 gcc/ChangeLog: * config/riscv/autovec-opt.md (*vfwadd_wf_<mode>): New pattern to combine float_extend + vec_duplicate + vfadd.vv into vfwadd.wf. * config/riscv/vector.md (@pred_single_widen_<plus_minus:optab><mode>_scalar): Swap and reorder operands to match the RTL emitted by expand. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwadd.wf. * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise. * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h: Add support for single widening variants. * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: Add support for single widening variants. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c: New test. --- gcc/config/riscv/autovec-opt.md | 22 ++++++++++++++ gcc/config/riscv/vector.md | 8 ++--- .../riscv/rvv/autovec/vx_vf/vf-1-f16.c | 2 ++ .../riscv/rvv/autovec/vx_vf/vf-1-f32.c | 2 ++ .../riscv/rvv/autovec/vx_vf/vf-2-f16.c | 3 +- .../riscv/rvv/autovec/vx_vf/vf-2-f32.c | 3 +- .../riscv/rvv/autovec/vx_vf/vf-3-f16.c | 2 ++ .../riscv/rvv/autovec/vx_vf/vf-3-f32.c | 2 ++ .../riscv/rvv/autovec/vx_vf/vf-4-f16.c | 1 + .../riscv/rvv/autovec/vx_vf/vf-4-f32.c | 1 + .../riscv/rvv/autovec/vx_vf/vf_binop.h | 30 +++++++++++++++++++ .../rvv/autovec/vx_vf/vf_binop_widen_run.h | 10 +++++-- .../rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c | 21 +++++++++++++ .../rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c | 17 +++++++++++ 14 files changed, 115 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md index dbbade34dce..5512c46fa8e 100644 --- gcc/config/riscv/autovec-opt.md +++ gcc/config/riscv/autovec-opt.md @@ -2193,6 +2193,28 @@ (define_insn_and_split "*vfwadd_vf_<mode>" [(set_attr "type" "vfwalu")] ) +;; vfwadd.wf +(define_insn_and_split "*vfwadd_wf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (plus:VWEXTF + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 2 "register_operand"))) + (match_operand:VWEXTF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_single_widen_scalar (PLUS, + <MODE>mode), + riscv_vector::BINARY_OP_FRM_DYN, operands); + + DONE; + } + [(set_attr "type" "vfwalu")] +) + ;; vfadd.vf (define_insn_and_split "*vfadd_vf_<mode>" [(set (match_operand:V_VLSF 0 "register_operand") diff --git gcc/config/riscv/vector.md gcc/config/riscv/vector.md index c6c37dff994..3cb87bf4eae 100644 --- gcc/config/riscv/vector.md +++ gcc/config/riscv/vector.md @@ -7325,10 +7325,10 @@ (define_insn "@pred_single_widen_<plus_minus:optab><mode>_scalar" (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (plus_minus:VWEXTF - (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr") - (float_extend:VWEXTF - (vec_duplicate:<V_DOUBLE_TRUNC> - (match_operand:<VSUBEL> 4 "register_operand" " f, f, f, f")))) + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 4 "register_operand" " f, f, f, f"))) + (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr")) (match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vfw<insn>.wf\t%0,%3,%4%p1" diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c index 696b7508065..fed5d3b6001 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c @@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP (_Float16), max) DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max) DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul) DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, +, add) +DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -49,3 +50,4 @@ DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, +, add) /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */ /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c index 7f746d8eb6a..82d64d11c87 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c @@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max) DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max) DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul) DEF_VF_BINOP_WIDEN_CASE_0 (float, double, +, add) +DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add) /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */ @@ -49,3 +50,4 @@ DEF_VF_BINOP_WIDEN_CASE_0 (float, double, +, add) /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */ /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */ /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */ +/* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c index 0ecfb6209e6..eef86749c50 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c @@ -24,4 +24,5 @@ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ -/* { dg-final { scan-assembler-times {fcvt.s.h} 6 } } */ +/* { dg-final { scan-assembler-not {vfwadd.wf} } } */ +/* { dg-final { scan-assembler-times {fcvt.s.h} 7 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c index d48777cf444..c5e93060e02 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c @@ -24,4 +24,5 @@ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ -/* { dg-final { scan-assembler-times {fcvt.d.s} 6 } } */ +/* { dg-final { scan-assembler-not {vfwadd.wf} } } */ +/* { dg-final { scan-assembler-times {fcvt.d.s} 7 } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c index 8e670a3c13a..f0c6594533e 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c @@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max, VF_BINOP_FUNC_BODY_X128) DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul) DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, +, add) +DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -53,3 +54,4 @@ DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, +, add) /* { dg-final { scan-assembler {vfmax.vf} } } */ /* { dg-final { scan-assembler {vfwmul.vf} } } */ /* { dg-final { scan-assembler {vfwadd.vf} } } */ +/* { dg-final { scan-assembler {vfwadd.wf} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c index 3c2e7f510b0..60617c3ec9b 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c @@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max, VF_BINOP_FUNC_BODY_X128) DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul) DEF_VF_BINOP_WIDEN_CASE_1 (float, double, +, add) +DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add) /* { dg-final { scan-assembler {vfmadd.vf} } } */ /* { dg-final { scan-assembler {vfmsub.vf} } } */ @@ -53,3 +54,4 @@ DEF_VF_BINOP_WIDEN_CASE_1 (float, double, +, add) /* { dg-final { scan-assembler {vfmax.vf} } } */ /* { dg-final { scan-assembler {vfwmul.vf} } } */ /* { dg-final { scan-assembler {vfwadd.vf} } } */ +/* { dg-final { scan-assembler {vfwadd.wf} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c index 069e82a27a5..0650265b6c0 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c @@ -24,4 +24,5 @@ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ +/* { dg-final { scan-assembler-not {vfwadd.wf} } } */ /* { dg-final { scan-assembler {fcvt.s.h} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c index 62e7aef82dd..b43699deb83 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c @@ -24,4 +24,5 @@ /* { dg-final { scan-assembler-not {vfmax.vf} } } */ /* { dg-final { scan-assembler-not {vfwmul.vf} } } */ /* { dg-final { scan-assembler-not {vfwadd.vf} } } */ +/* { dg-final { scan-assembler-not {vfwadd.wf} } } */ /* { dg-final { scan-assembler {fcvt.d.s} } } */ diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h index 1d97ea4f482..2a55c9c6df9 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h @@ -253,4 +253,34 @@ DEF_MAX_1 (double) } \ } +#define DEF_VF_BINOP_WIDEN_CASE_2(T1, T2, OP, NAME) \ + void test_vf_binop_widen_##NAME##_##T1##_case_2 (T2 *restrict out, \ + T2 *restrict in, T1 f, \ + unsigned n) \ + { \ + for (unsigned i = 0; i < n; i++) \ + out[i] = (T2) f OP in[i]; \ + } +#define DEF_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, OP, NAME) \ + DEF_VF_BINOP_WIDEN_CASE_2 (T1, T2, OP, NAME) +#define RUN_VF_BINOP_WIDEN_CASE_2(T1, T2, NAME, out, in, f, n) \ + test_vf_binop_widen_##NAME##_##T1##_case_2 (out, in, f, n) +#define RUN_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, NAME, out, in, f, n) \ + RUN_VF_BINOP_WIDEN_CASE_2 (T1, T2, NAME, out, in, f, n) + +#define DEF_VF_BINOP_WIDEN_CASE_3(TYPE1, TYPE2, OP, NAME) \ + void test_vf_binop_widen_##NAME##_##TYPE1##_##TYPE2##_case_3 ( \ + TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3, \ + TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE2 *__restrict b, \ + TYPE1 *__restrict a2, TYPE2 *__restrict b2, int n) \ + { \ + for (int i = 0; i < n; i++) \ + { \ + dst[i] = (TYPE2) * a OP b[i]; \ + dst2[i] = (TYPE2) * a2 OP b[i]; \ + dst3[i] = (TYPE2) * a2 OP b2[i]; \ + dst4[i] = (TYPE2) * a OP b2[i]; \ + } \ + } + #endif diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h index 929b2dec85d..8748cda21aa 100644 --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h @@ -5,19 +5,23 @@ #define N 512 +#ifdef SINGLE +#define TIN T2 +#else +#define TIN T1 +#endif + int main () { T1 f; - T1 in[N]; + TIN in[N]; T2 out[N]; - T2 out2[N]; f = LIMIT % 8723; for (int i = 0; i < N; i++) { in[i] = LIMIT + i & 1964; out[i] = LIMIT + i & 628; - out2[i] = LIMIT + i & 628; asm volatile ("" ::: "memory"); } diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c new file mode 100644 index 00000000000..d5400cc4a67 --- /dev/null +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c @@ -0,0 +1,21 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-require-effective-target riscv_zvfh_ok } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvfh" } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_binop.h" + +#define T1 _Float16 +#define T2 float +#define NAME add +#define OP + + +DEF_VF_BINOP_WIDEN_CASE_2_WRAP (T1, T2, OP, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -32768 +#define SINGLE + +#include "vf_binop_widen_run.h" diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c new file mode 100644 index 00000000000..630a153ff14 --- /dev/null +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "--param=fpr2vr-cost=0" } */ + +#include "vf_binop.h" + +#define T1 float +#define T2 double +#define NAME add +#define OP + + +DEF_VF_BINOP_WIDEN_CASE_2_WRAP (T1, T2, OP, NAME) + +#define TEST_RUN(T1, T2, NAME, out, in, f, n) RUN_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, NAME, out, in, f, n) +#define LIMIT -2147483648 +#define SINGLE + +#include "vf_binop_widen_run.h" -- 2.39.5