[gcc r16-2468] aarch64: Relaxed SEL combiner patterns for unpacked SVE FP binary arithmetic

Spencer Abson via Gcc-cvs Thu, 24 Jul 2025 04:14:51 -0700

https://gcc.gnu.org/g:317c6a4e09614ec20a6290a8154c5359bc10bd5f


commit r16-2468-g317c6a4e09614ec20a6290a8154c5359bc10bd5f
Author: Spencer Abson <spencer.ab...@arm.com>
Date:   Tue Jul 8 12:49:42 2025 +0000

    aarch64: Relaxed SEL combiner patterns for unpacked SVE FP binary arithmetic
    
    Extend the binary op/UNSPEC_SEL combiner patterns from SVE_FULL_F/
    SVE_FULL_F_B16B16 to SVE_F/SVE_F_B16B16, where the strictness value
    is SVE_RELAXED_GP.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_relaxed):
            Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16.
            (*cond_<optab><mode>_3_relaxed): Likewise.
            (*cond_<optab><mode>_any_relaxed): Likwise.
            (*cond_<optab><mode>_any_const_relaxed): Extend from SVE_FULL_F
            to SVE_F.
            (*cond_add<mode>_2_const_relaxed): Likewise.
            (*cond_add<mode>_any_const_relaxed): Likewise.
            (*cond_sub<mode>_3_const_relaxed): Likewise.
            (*cond_sub<mode>_const_relaxed): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C: New test.
            * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fadd_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_cond_fmul_1.c: Likewise..
            * gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md                  | 98 +++++++++++-----------
 .../aarch64/sve/unpacked_cond_binary_bf16_1.C      | 46 ++++++++++
 .../aarch64/sve/unpacked_cond_builtin_fmax_1.c     | 51 +++++++++++
 .../aarch64/sve/unpacked_cond_builtin_fmin_1.c     | 51 +++++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fadd_1.c  | 62 ++++++++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c  | 47 +++++++++++
 .../aarch64/sve/unpacked_cond_fmaxnm_1.c           | 53 ++++++++++++
 .../aarch64/sve/unpacked_cond_fminnm_1.c           | 53 ++++++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fmul_1.c  | 50 +++++++++++
 .../gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c | 56 +++++++++++++
 10 files changed, 518 insertions(+), 49 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index fc0f51e73a3e..b252eef411c2 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5621,14 +5621,14 @@
 
 ;; Predicated floating-point operations, merging with the first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+          (unspec:SVE_F_B16B16
             [(match_operand 4)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -5664,14 +5664,14 @@
 
 ;; Same for operations that take a 1-bit constant.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 4)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
             SVE_COND_FP_BINARY_I1)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -5707,14 +5707,14 @@
 
 ;; Predicated floating-point operations, merging with the second input.
 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+          (unspec:SVE_F_B16B16
             [(match_operand 4)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
           (match_dup 3)]
          UNSPEC_SEL))]
@@ -5750,16 +5750,16 @@
 
 ;; Predicated floating-point operations, merging with an independent value.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F_B16B16
+          (unspec:SVE_F_B16B16
             [(match_operand 5)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-             (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+             (match_operand:SVE_F_B16B16 2 "register_operand")
+             (match_operand:SVE_F_B16B16 3 "register_operand")]
             SVE_COND_FP_BINARY)
-          (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
@@ -5832,16 +5832,16 @@
 
 ;; Same for operations that take a 1-bit constant.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 5)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
             SVE_COND_FP_BINARY_I1)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 4   ]
@@ -5928,14 +5928,14 @@
 ;; Predicated floating-point addition of a constant, merging with the
 ;; first input.
 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 4)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
             UNSPEC_COND_FADD)
           (match_dup 2)]
          UNSPEC_SEL))]
@@ -5976,16 +5976,16 @@
 ;; Predicated floating-point addition of a constant, merging with an
 ;; independent value.
 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 5)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "register_operand")
-             (match_operand:SVE_FULL_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
+             (match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 
"aarch64_sve_float_arith_with_sub_immediate")]
             UNSPEC_COND_FADD)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
@@ -6243,14 +6243,14 @@
 ;; Predicated floating-point subtraction from a constant, merging with the
 ;; second input.
 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 4)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-             (match_operand:SVE_FULL_F 3 "register_operand")]
+             (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+             (match_operand:SVE_F 3 "register_operand")]
             UNSPEC_COND_FSUB)
           (match_dup 3)]
          UNSPEC_SEL))]
@@ -6287,16 +6287,16 @@
 ;; Predicated floating-point subtraction from a constant, merging with an
 ;; independent value.
 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_FULL_F
+          (unspec:SVE_F
             [(match_operand 5)
              (const_int SVE_RELAXED_GP)
-             (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-             (match_operand:SVE_FULL_F 3 "register_operand")]
+             (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+             (match_operand:SVE_F 3 "register_operand")]
             UNSPEC_COND_FSUB)
-          (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
          UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
   {@ [ cons: =0 , 1   , 3 , 4   ]
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C 
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C
new file mode 100644
index 000000000000..560d874cff7a
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_1.C
@@ -0,0 +1,46 @@
+/* { dg-do compile }*/
+/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fno-trapping-math 
-msve-vector-bits=2048 " } */
+
+#include <stdint.h>
+#pragma GCC target "arch=armv9-a+sve-b16b16"
+
+#define ADD(a, b) a + b
+#define SUB(a, b) a - b
+#define MUL(a, b) a * b
+#define MAX(a, b) (a > b) ? a : b
+#define MIN(a, b) (a > b) ? b : a
+
+#define COND_OP(OP, TYPE, PRED_TYPE, ARG2, MERGE)                              
    \
+  TYPE test_##OP##_##TYPE##_##ARG2##_##MERGE (TYPE a, TYPE b, TYPE c, 
PRED_TYPE p) \
+  {return p ? OP (a, ARG2) : MERGE; }
+
+#define TEST_OP(OP, TYPE, PRED_TYPE, T)     \
+  T (OP, TYPE, PRED_TYPE, b, a)             \
+  T (OP, TYPE, PRED_TYPE, b, b)             \
+  T (OP, TYPE, PRED_TYPE, b, c)
+
+#define TEST_ALL(TYPE, PRED_TYPE, T) \
+  TEST_OP (ADD, TYPE, PRED_TYPE, T)  \
+  TEST_OP (SUB, TYPE, PRED_TYPE, T)  \
+  TEST_OP (MUL, TYPE, PRED_TYPE, T)  \
+  TEST_OP (MAX, TYPE, PRED_TYPE, T)  \
+  TEST_OP (MIN, TYPE, PRED_TYPE, T)
+
+#define TEST(TYPE, PTYPE, SIZE)                                   \
+  typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE)));   \
+  typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \
+  TEST_ALL (TYPE##SIZE, PTYPE##SIZE, COND_OP)
+
+TEST (__bf16, uint16_t, 128)
+
+TEST (__bf16, uint16_t, 64)
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+
+// There's no BFSUBR.
+/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c
new file mode 100644
index 000000000000..d328b3713426
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)           \
+  void                                                         \
+  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,        \
+                                        TYPE0 *__restrict a,   \
+                                        TYPE0 *__restrict b,   \
+                                        TYPE0 *__restrict c,   \
+                                        TYPE1 *__restrict p)   \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;           \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
+
+TEST_ALL (__builtin_fmaxf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fmaxf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fmaxf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c
new file mode 100644
index 000000000000..1821f0370f0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)           \
+  void                                                         \
+  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,        \
+                                        TYPE0 *__restrict a,   \
+                                        TYPE0 *__restrict b,   \
+                                        TYPE0 *__restrict c,   \
+                                        TYPE1 *__restrict p)   \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;           \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
+
+TEST_ALL (__builtin_fminf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fminf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fminf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c
new file mode 100644
index 000000000000..666cf89df435
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_1.c
@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+#define imm_p5 0.5
+
+#define ADD(A, B) A + B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, NAME, RHS, MERGE)      \
+  void                                                          \
+  f_##TYPE0##_##TYPE1##_##NAME##_##MERGE (TYPE0 *__restrict out, \
+                                        TYPE0 *__restrict a,    \
+                                        TYPE0 *__restrict b,    \
+                                        TYPE0 *__restrict c,    \
+                                        TYPE1 *__restrict p)    \
+  {                                                             \
+    for (unsigned int i = 0; i < COUNT; i++)                    \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;            \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)           \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i], c_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1, a_i)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1, b_i)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1, b_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5, a_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5, a_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5, b_i)
+
+TEST_ALL (ADD, _Float16, uint64_t, 32)
+
+TEST_ALL (ADD, _Float16, uint32_t, 64)
+
+TEST_ALL (ADD, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 19 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 19 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 19 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 5 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 10 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c
new file mode 100644
index 000000000000..ec5653e7a472
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define DIV(A, B) A / B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)           \
+  void                                                         \
+  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,        \
+                                        TYPE0 *__restrict a,   \
+                                        TYPE0 *__restrict b,   \
+                                        TYPE0 *__restrict c,   \
+                                        TYPE1 *__restrict p)   \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;           \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)       \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i)
+
+TEST_ALL (DIV, _Float16, uint64_t, 32)
+
+TEST_ALL (DIV, _Float16, uint32_t, 64)
+
+TEST_ALL (DIV, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c
new file mode 100644
index 000000000000..d34872f9d899
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_1.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-signed-zeros -ffinite-math-only -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define MAX(A, B) (A > B) ? A : B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)           \
+  void                                                         \
+  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,        \
+                                        TYPE0 *__restrict a,   \
+                                        TYPE0 *__restrict b,   \
+                                        TYPE0 *__restrict c,   \
+                                        TYPE1 *__restrict p)   \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;           \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
+
+TEST_ALL (MAX, _Float16, uint64_t, 32)
+
+TEST_ALL (MAX, _Float16, uint32_t, 64)
+
+TEST_ALL (MAX, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c
new file mode 100644
index 000000000000..d6c3c38fc1a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_1.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-signed-zeros -ffinite-math-only -fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+
+#define MIN(A, B) (A < B) ? A : B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)           \
+  void                                                         \
+  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,        \
+                                        TYPE0 *__restrict a,   \
+                                        TYPE0 *__restrict b,   \
+                                        TYPE0 *__restrict c,   \
+                                        TYPE1 *__restrict p)   \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;           \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0, b_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)
+
+TEST_ALL (MIN, _Float16, uint64_t, 32)
+
+TEST_ALL (MIN, _Float16, uint32_t, 64)
+
+TEST_ALL (MIN, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c
new file mode 100644
index 000000000000..1ae7678d22d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_1.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+#define imm_p5 0.5
+
+#define MUL(A, B) A * B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)           \
+  void                                                         \
+  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,        \
+                                        TYPE0 *__restrict a,   \
+                                        TYPE0 *__restrict b,   \
+                                        TYPE0 *__restrict c,   \
+                                        TYPE1 *__restrict p)   \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;           \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)       \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, a_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, b_i)
+
+TEST_ALL (MUL, _Float16, uint64_t, 32)
+
+TEST_ALL (MUL, _Float16, uint32_t, 64)
+
+TEST_ALL (MUL, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 10 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 10 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 10 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c
new file mode 100644
index 000000000000..eafd1690eebe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 
-fno-trapping-math" } */
+
+#include <stdint.h>
+
+#define a_i a[i]
+#define b_i b[i]
+#define c_i c[i]
+#define imm_p5 0.5
+
+#define SUBR(A, B) B - A
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS, MERGE)           \
+  void                                                         \
+  f_##TYPE0##_##TYPE1##_##RHS##_##MERGE (TYPE0 *__restrict out,        \
+                                        TYPE0 *__restrict a,   \
+                                        TYPE0 *__restrict b,   \
+                                        TYPE0 *__restrict c,   \
+                                        TYPE1 *__restrict p)   \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      out[i] = p[i] ? FN (a[i], (TYPE0)RHS) : MERGE;           \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)       \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, a_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, c_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, a_i)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1, b_i)     \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, a_i) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, imm_p5, b_i)
+
+TEST_ALL (SUBR, _Float16, uint64_t, 32)
+
+TEST_ALL (SUBR, _Float16, uint32_t, 64)
+
+TEST_ALL (SUBR, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */

[gcc r16-2468] aarch64: Relaxed SEL combiner patterns for unpacked SVE FP binary arithmetic

Reply via email to