https://gcc.gnu.org/g:3b33628d7ef666d92f5edbd72b9a697d293ed4a2

commit 3b33628d7ef666d92f5edbd72b9a697d293ed4a2
Author: Paul-Antoine Arras <[email protected]>
Date:   Tue Sep 9 11:43:44 2025 +0200

    RISC-V: Add pattern for vector-scalar single widening floating-point add
    
    This pattern enables the combine pass (or late-combine, depending on the 
case)
    to merge a float_extend'ed vec_duplicate into a plus RTL instruction. The 
other
    plus operand is already wide.
    
    Before this patch, we have four instructions, e.g.:
      fcvt.d.s        fa0,fa0
      vsetvli         a5,zero,e64,m1,ta,ma
      vfmv.v.f        v2,fa0
      vfadd.vv        v1,v1,v2
    
    After, we get only one:
      vfwadd.wf       v1,v1,fa0
    
    gcc/ChangeLog:
    
            * config/riscv/autovec-opt.md (*vfwadd_wf_<mode>): New pattern to
            combine float_extend + vec_duplicate + vfadd.vv into vfwadd.wf.
            * config/riscv/vector.md
            (@pred_single_widen_<plus_minus:optab><mode>_scalar): Swap and 
reorder
            operands to match the RTL emitted by expand.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c: Add vfwadd.wf.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c: Likewise.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c: Likewise.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c: Likewise.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c: Likewise.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c: Likewise.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c: Likewise.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c: Likewise.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h: Add support for 
single
            widening variants.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h: Add 
support
            for single widening variants.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c: New 
test.
            * gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c: New 
test.
    
    (cherry picked from commit 01827a3f65931df0c54baba1f7c0d68052747a6d)

Diff:
---
 gcc/config/riscv/autovec-opt.md                    | 22 ++++++++++++++++
 gcc/config/riscv/vector.md                         |  8 +++---
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c  |  2 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c  |  2 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c  |  3 ++-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c  |  3 ++-
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c  |  2 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c  |  2 ++
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c  |  1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c  |  1 +
 .../gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h  | 30 ++++++++++++++++++++++
 .../riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h   | 10 +++++---
 .../riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c  | 21 +++++++++++++++
 .../riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c  | 17 ++++++++++++
 14 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index dbbade34dce3..5512c46fa8e5 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -2193,6 +2193,28 @@
   [(set_attr "type" "vfwalu")]
 )
 
+;; vfwadd.wf
+(define_insn_and_split "*vfwadd_wf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (plus:VWEXTF
+      (vec_duplicate:VWEXTF
+       (float_extend:<VEL>
+         (match_operand:<VSUBEL> 2 "register_operand")))
+      (match_operand:VWEXTF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_single_widen_scalar (PLUS,
+                                                                   <MODE>mode),
+                                  riscv_vector::BINARY_OP_FRM_DYN, operands);
+
+    DONE;
+  }
+  [(set_attr "type" "vfwalu")]
+)
+
 ;; vfadd.vf
 (define_insn_and_split "*vfadd_vf_<mode>"
   [(set (match_operand:V_VLSF 0 "register_operand")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index c6c37dff9945..3cb87bf4eae9 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -7325,10 +7325,10 @@
             (reg:SI VTYPE_REGNUM)
             (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
          (plus_minus:VWEXTF
-           (match_operand:VWEXTF 3 "register_operand"            " vr, vr, vr, 
vr")
-           (float_extend:VWEXTF
-             (vec_duplicate:<V_DOUBLE_TRUNC>
-               (match_operand:<VSUBEL> 4 "register_operand"      "  f,  f,  f, 
 f"))))
+           (vec_duplicate:VWEXTF
+             (float_extend:<VEL>
+               (match_operand:<VSUBEL> 4 "register_operand"      "  f,  f,  f, 
 f")))
+           (match_operand:VWEXTF 3 "register_operand"            " vr, vr, vr, 
vr"))
          (match_operand:VWEXTF 2 "vector_merge_operand"          " vu,  0, vu, 
 0")))]
   "TARGET_VECTOR"
   "vfw<insn>.wf\t%0,%3,%4%p1"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
index 696b75080659..fed5d3b60013 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f16.c
@@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_0_WRAP 
(_Float16), max)
 DEF_VF_BINOP_CASE_2_WRAP (_Float16, MAX_FUNC_1_WRAP (_Float16), max)
 DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, +, add)
+DEF_VF_BINOP_WIDEN_CASE_2 (_Float16, float, +, add)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -49,3 +50,4 @@ DEF_VF_BINOP_WIDEN_CASE_0 (_Float16, float, +, add)
 /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
 /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
index 7f746d8eb6aa..82d64d11c87c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-1-f32.c
@@ -27,6 +27,7 @@ DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_0_WRAP (float), max)
 DEF_VF_BINOP_CASE_2_WRAP (float, MAX_FUNC_1_WRAP (float), max)
 DEF_VF_BINOP_WIDEN_CASE_0 (float, double, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_0 (float, double, +, add)
+DEF_VF_BINOP_WIDEN_CASE_2 (float, double, +, add)
 
 /* { dg-final { scan-assembler-times {vfmadd.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfmsub.vf} 1 } } */
@@ -49,3 +50,4 @@ DEF_VF_BINOP_WIDEN_CASE_0 (float, double, +, add)
 /* { dg-final { scan-assembler-times {vfmax.vf} 2 } } */
 /* { dg-final { scan-assembler-times {vfwmul.vf} 1 } } */
 /* { dg-final { scan-assembler-times {vfwadd.vf} 1 } } */
+/* { dg-final { scan-assembler-times {vfwadd.wf} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
index 0ecfb6209e62..eef86749c505 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f16.c
@@ -24,4 +24,5 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.s.h} 6 } } */
+/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
+/* { dg-final { scan-assembler-times {fcvt.s.h} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
index d48777cf4445..c5e93060e027 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-2-f32.c
@@ -24,4 +24,5 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
-/* { dg-final { scan-assembler-times {fcvt.d.s} 6 } } */
+/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
+/* { dg-final { scan-assembler-times {fcvt.d.s} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
index 8e670a3c13ac..f0c6594533ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f16.c
@@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (_Float16, MAX_FUNC_1_WRAP 
(_Float16), max,
                          VF_BINOP_FUNC_BODY_X128)
 DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, +, add)
+DEF_VF_BINOP_WIDEN_CASE_3 (_Float16, float, +, add)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
 /* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -53,3 +54,4 @@ DEF_VF_BINOP_WIDEN_CASE_1 (_Float16, float, +, add)
 /* { dg-final { scan-assembler {vfmax.vf} } } */
 /* { dg-final { scan-assembler {vfwmul.vf} } } */
 /* { dg-final { scan-assembler {vfwadd.vf} } } */
+/* { dg-final { scan-assembler {vfwadd.wf} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
index 3c2e7f510b02..60617c3ec9b6 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-3-f32.c
@@ -31,6 +31,7 @@ DEF_VF_BINOP_CASE_3_WRAP (float, MAX_FUNC_1_WRAP (float), max,
                          VF_BINOP_FUNC_BODY_X128)
 DEF_VF_BINOP_WIDEN_CASE_1 (float, double, *, mul)
 DEF_VF_BINOP_WIDEN_CASE_1 (float, double, +, add)
+DEF_VF_BINOP_WIDEN_CASE_3 (float, double, +, add)
 
 /* { dg-final { scan-assembler {vfmadd.vf} } } */
 /* { dg-final { scan-assembler {vfmsub.vf} } } */
@@ -53,3 +54,4 @@ DEF_VF_BINOP_WIDEN_CASE_1 (float, double, +, add)
 /* { dg-final { scan-assembler {vfmax.vf} } } */
 /* { dg-final { scan-assembler {vfwmul.vf} } } */
 /* { dg-final { scan-assembler {vfwadd.vf} } } */
+/* { dg-final { scan-assembler {vfwadd.wf} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
index 069e82a27a5d..0650265b6c0c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f16.c
@@ -24,4 +24,5 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
 /* { dg-final { scan-assembler {fcvt.s.h} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
index 62e7aef82dd9..b43699deb83c 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf-4-f32.c
@@ -24,4 +24,5 @@
 /* { dg-final { scan-assembler-not {vfmax.vf} } } */
 /* { dg-final { scan-assembler-not {vfwmul.vf} } } */
 /* { dg-final { scan-assembler-not {vfwadd.vf} } } */
+/* { dg-final { scan-assembler-not {vfwadd.wf} } } */
 /* { dg-final { scan-assembler {fcvt.d.s} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
index 1d97ea4f4828..2a55c9c6df93 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop.h
@@ -253,4 +253,34 @@ DEF_MAX_1 (double)
       }                                                                        
\
   }
 
+#define DEF_VF_BINOP_WIDEN_CASE_2(T1, T2, OP, NAME)                            
\
+  void test_vf_binop_widen_##NAME##_##T1##_case_2 (T2 *restrict out,           
\
+                                                  T2 *restrict in, T1 f,      \
+                                                  unsigned n)                 \
+  {                                                                            
\
+    for (unsigned i = 0; i < n; i++)                                           
\
+      out[i] = (T2) f OP in[i];                                                
\
+  }
+#define DEF_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, OP, NAME)                       
\
+  DEF_VF_BINOP_WIDEN_CASE_2 (T1, T2, OP, NAME)
+#define RUN_VF_BINOP_WIDEN_CASE_2(T1, T2, NAME, out, in, f, n)                 
\
+  test_vf_binop_widen_##NAME##_##T1##_case_2 (out, in, f, n)
+#define RUN_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, NAME, out, in, f, n)            
\
+  RUN_VF_BINOP_WIDEN_CASE_2 (T1, T2, NAME, out, in, f, n)
+
+#define DEF_VF_BINOP_WIDEN_CASE_3(TYPE1, TYPE2, OP, NAME)                      
\
+  void test_vf_binop_widen_##NAME##_##TYPE1##_##TYPE2##_case_3 (               
\
+    TYPE2 *__restrict dst, TYPE2 *__restrict dst2, TYPE2 *__restrict dst3,     
\
+    TYPE2 *__restrict dst4, TYPE1 *__restrict a, TYPE2 *__restrict b,          
\
+    TYPE1 *__restrict a2, TYPE2 *__restrict b2, int n)                         
\
+  {                                                                            
\
+    for (int i = 0; i < n; i++)                                                
\
+      {                                                                        
\
+       dst[i] = (TYPE2) * a OP b[i];                                          \
+       dst2[i] = (TYPE2) * a2 OP b[i];                                        \
+       dst3[i] = (TYPE2) * a2 OP b2[i];                                       \
+       dst4[i] = (TYPE2) * a OP b2[i];                                        \
+      }                                                                        
\
+  }
+
 #endif
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
index 929b2dec85da..8748cda21aa9 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_binop_widen_run.h
@@ -5,19 +5,23 @@
 
 #define N 512
 
+#ifdef SINGLE
+#define TIN T2
+#else
+#define TIN T1
+#endif
+
 int main ()
 {
   T1 f;
-  T1 in[N];
+  TIN in[N];
   T2 out[N];
-  T2 out2[N];
 
   f = LIMIT % 8723;
   for (int i = 0; i < N; i++) 
     {
       in[i] = LIMIT + i & 1964;
       out[i] = LIMIT + i & 628;
-      out2[i] = LIMIT + i & 628;
       asm volatile ("" ::: "memory");
     }
 
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c
new file mode 100644
index 000000000000..d5400cc4a670
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f16.c
@@ -0,0 +1,21 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-require-effective-target riscv_zvfh_ok } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvfh" } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1    _Float16
+#define T2    float
+#define NAME add
+#define OP +
+
+DEF_VF_BINOP_WIDEN_CASE_2_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) 
RUN_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -32768
+#define SINGLE
+
+#include "vf_binop_widen_run.h"
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c
new file mode 100644
index 000000000000..630a153ff144
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vf_vfwadd-run-2-f32.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "--param=fpr2vr-cost=0" } */
+
+#include "vf_binop.h"
+
+#define T1    float
+#define T2    double
+#define NAME add
+#define OP +
+
+DEF_VF_BINOP_WIDEN_CASE_2_WRAP (T1, T2, OP, NAME)
+
+#define TEST_RUN(T1, T2, NAME, out, in, f, n) 
RUN_VF_BINOP_WIDEN_CASE_2_WRAP(T1, T2, NAME, out, in, f, n)
+#define LIMIT -2147483648
+#define SINGLE
+
+#include "vf_binop_widen_run.h"

Reply via email to