This pattern enables the combine pass (or late-combine, depending on the case)
to merge a float_extend'ed vec_duplicate into a minus RTL instruction. Both
minus operands are widened.
Before this patch, we have six instructions, e.g.:
fcvt.d.s fa0,fa0
vsetvli a5,zero,e64,m1,ta,ma
vfmv.v.f v3,fa0
vfwcvt.f.f.v v1,v2
vsetvli zero,zero,e64,m1,ta,ma
vfsub.vv v1,v1,v3
After, we get only one:
vfwsub.vf v1,v2,fa0
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*vfwsub_vf_<mode>): New pattern to
combine float_extend + vec_duplicate + vfwsub.vv into vfwsub.vf.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwsub.vf.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
(DEF_VF_BINOP_WIDEN_CASE_0, DEF_VF_BINOP_WIDEN_CASE_1): Swap operands.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: Likewise.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c: New test.
* gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c: New test.
---
gcc/config/riscv/autovec-opt.md | 23 +++++++++++++++++++
.../riscv/rvv/autovec/vx_vf/vf-1-f16.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-1-f32.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-2-f16.c | 3 ++-
.../riscv/rvv/autovec/vx_vf/vf-2-f32.c | 3 ++-
.../riscv/rvv/autovec/vx_vf/vf-3-f16.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-3-f32.c | 2 ++
.../riscv/rvv/autovec/vx_vf/vf-4-f16.c | 1 +
.../riscv/rvv/autovec/vx_vf/vf-4-f32.c | 1 +
.../riscv/rvv/autovec/vx_vf/vf_binop.h | 10 ++++----
.../rvv/autovec/vx_vf/vf_binop_widen_run.h | 2 +-
.../rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c | 20 ++++++++++++++++
.../rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c | 16 +++++++++++++
13 files changed, 79 insertions(+), 8 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c
diff --git gcc/config/riscv/autovec-opt.md gcc/config/riscv/autovec-opt.md
index 5512c46fa8e..02f19bc6a42 100644
--- gcc/config/riscv/autovec-opt.md
+++ gcc/config/riscv/autovec-opt.md
@@ -2215,6 +2215,29 @@ (define_insn_and_split "*vfwadd_wf_<mode>"
[(set_attr "type" "vfwalu")]
)
+;; vfwsub.vf
+(define_insn_and_split "*vfwsub_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (minus:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 2 "register_operand")))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_dual_widen_scalar (MINUS,
+ <MODE>mode),
+ riscv_vector::BINARY_OP_FRM_DYN, operands);
+
+ DONE;
+ }
+ [(set_attr "type" "vfwalu")]
+)
+
;; vfadd.vf
(define_insn_and_split "*vfadd_vf_<mode>"
[(set (match_operand:V_VLSF 0 "register_operand")
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
index fed5d3b6001..20e809010d8 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
@@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP
(_Float16), max)
DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul)
DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, +, add)
+DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, -, sub)
DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
@@ -50,4 +51,5 @@ DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add)
/* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwsub.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
index 82d64d11c87..8ecd7d0fa00 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
@@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max)
DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul)
DEF_VF_BINOP_WIDEN_CASE_0 (float, double, +, add)
+DEF_VF_BINOP_WIDEN_CASE_0 (float, double, -, sub)
DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add)
/* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
@@ -50,4 +51,5 @@ DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add)
/* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
/* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwsub.vf} 1 } } */
/* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
index eef86749c50..8fe361f4f70 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
@@ -24,5 +24,6 @@
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
-/* { dg-final { scan-assembler-times {fcvt.s.h} 7 } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 8 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
index c5e93060e02..a1eaaa8b47f 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
@@ -24,5 +24,6 @@
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
-/* { dg-final { scan-assembler-times {fcvt.d.s} 7 } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 8 } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
index f0c6594533e..f799437d3ca 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
@@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP
(_Float16), max,
VF_BINOP_FUNC_BODY_X128)
DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul)
DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, +, add)
+DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, -, sub)
DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
@@ -54,4 +55,5 @@ DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add)
/* { dg-final { scan-assembler {vfmax.vf} } } */
/* { dg-final { scan-assembler {vfwmul.vf} } } */
/* { dg-final { scan-assembler {vfwadd.vf} } } */
+/* { dg-final { scan-assembler {vfwsub.vf} } } */
/* { dg-final { scan-assembler {vfwadd.wf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
index 60617c3ec9b..bb987e1edc0 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
@@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
VF_BINOP_FUNC_BODY_X128)
DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul)
DEF_VF_BINOP_WIDEN_CASE_1 (float, double, +, add)
+DEF_VF_BINOP_WIDEN_CASE_1 (float, double, -, sub)
DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add)
/* { dg-final { scan-assembler {vfmadd.vf} } } */
@@ -54,4 +55,5 @@ DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add)
/* { dg-final { scan-assembler {vfmax.vf} } } */
/* { dg-final { scan-assembler {vfwmul.vf} } } */
/* { dg-final { scan-assembler {vfwadd.vf} } } */
+/* { dg-final { scan-assembler {vfwsub.vf} } } */
/* { dg-final { scan-assembler {vfwadd.wf} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
index 0650265b6c0..50a4968718b 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
@@ -24,5 +24,6 @@
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
/* { dg-final { scan-assembler {fcvt.s.h} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
index b43699deb83..2e7ef538215 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
@@ -24,5 +24,6 @@
/* { dg-final { scan-assembler-not {vfmax.vf} } } */
/* { dg-final { scan-assembler-not {vfwmul.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwsub.vf} } } */
/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
/* { dg-final { scan-assembler {fcvt.d.s} } } */
diff --git gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
index 2a55c9c6df9..479a6fa7222 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
@@ -37,7 +37,7 @@
unsigned n) \
{
\
for (unsigned i = 0; i < n; i++)
\
- out[i] = (T2) f OP (T2) in[i];
\
+ out[i] = (T2) in[i] OP (T2) f;
\
}
#define DEF_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, OP, NAME)
\
DEF_VF_BINOP_WIDEN_CASE_0 (T1, T2, OP, NAME)
@@ -246,10 +246,10 @@ DEF_MAX_1 (double)
{
\
for (int i = 0; i < n; i++)
\
{
\
- dst[i] = (TYPE2) * a OP (TYPE2) b[i]; \
- dst2[i] = (TYPE2) * a2 OP (TYPE2) b[i]; \
- dst3[i] = (TYPE2) * a2 OP (TYPE2) a[i]; \
- dst4[i] = (TYPE2) * a OP (TYPE2) b2[i]; \
+ dst[i] = (TYPE2) b[i] OP (TYPE2) * a; \
+ dst2[i] = (TYPE2) b[i] OP (TYPE2) * a2; \
+ dst3[i] = (TYPE2) a[i] OP (TYPE2) * a2; \
+ dst4[i] = (TYPE2) b2[i] OP (TYPE2) * a; \
}
\
}
diff --git
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
index 8748cda21aa..1c9dc8c5e7b 100644
--- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
@@ -28,7 +28,7 @@ int main ()
TEST_RUN (T1, T2, NAME, out, in, f, N);
for (int i = 0; i < N; i++)
- assert (out[i] == ((T2) f OP (T2) in[i]));
+ assert (out[i] == ((T2) in[i] OP (T2) f));
return 0;
}
diff --git
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c
new file mode 100644
index 00000000000..6269073b940
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f16.c
@@ -0,0 +1,20 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1 _Float16
+#define T2 float
+#define NAME sub
+#define OP -
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n)
RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+
+#include "vf_binop_widen_run.h"
diff --git
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c
new file mode 100644
index 00000000000..9e23db1f8af
--- /dev/null
+++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwsub-run-1-f32.c
@@ -0,0 +1,16 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1 float
+#define T2 double
+#define NAME sub
+#define OP -
+
+DEF_VF_BINOP_WIDEN_CASE_0_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n)
RUN_VF_BINOP_WIDEN_CASE_0_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+
+#include "vf_binop_widen_run.h"
--
2.39.5