[gcc r16-2466] aarch64: Add support for unpacked SVE FP binary arithmetic

Spencer Abson via Gcc-cvs Thu, 24 Jul 2025 02:45:08 -0700

https://gcc.gnu.org/g:455257a4fff139268584d1a40440dee978659d0e


commit r16-2466-g455257a4fff139268584d1a40440dee978659d0e
Author: Spencer Abson <spencer.ab...@arm.com>
Date:   Tue Jul 8 10:51:33 2025 +0000

    aarch64: Add support for unpacked SVE FP binary arithmetic
    
    This patch extends the expanders for unpredicated smax, smin, add, sub,
    mul, min, and max, so that they support partial SVE FP modes.
    
    The relevant insn and splitting patterns are also updated.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-sve.md (<optab><mode>3): Extend from
            SVE_FULL_F to SVE_F, use aarch64_sve_fp_pred.
            (*post_ra_<sve_fp_op><mode>3): Extend from SVE_FULL_F to SVE_F.
            (@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F to SVE_F,
            use aarch64_predicate_operand (ADD/SUB/MUL/MAX/MIN).
            (split for using unpredicated insns): Move SVE_RELAXED_GP into
            the pattern, rather than testing for it in the condition.
            * config/aarch64/aarch64-sve2.md (@aarch64_pred_<optab><mode>):
            Extend from VNx8BF_ONLY to SVE_BF.
    
    gcc/testsuite/ChangeLog:
    
            * g++.target/aarch64/sve/unpacked_binary_bf16_1.C: New test.
            * g++.target/aarch64/sve/unpacked_binary_bf16_2.C: Likewise.
            * gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fadd_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fadd_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fmaxnm_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fmaxnm_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fminnm_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fminnm_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fmul_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fmul_2.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fsubr_1.c: Likewise.
            * gcc.target/aarch64/sve/unpacked_fsubr_2.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve.md                  | 76 +++++++++++-----------
 gcc/config/aarch64/aarch64-sve2.md                 | 10 +--
 .../aarch64/sve/unpacked_binary_bf16_1.C           | 35 ++++++++++
 .../aarch64/sve/unpacked_binary_bf16_2.C           | 15 +++++
 .../aarch64/sve/unpacked_builtin_fmax_1.c          | 44 +++++++++++++
 .../aarch64/sve/unpacked_builtin_fmax_2.c          | 20 ++++++
 .../aarch64/sve/unpacked_builtin_fmin_1.c          | 44 +++++++++++++
 .../aarch64/sve/unpacked_builtin_fmin_2.c          | 20 ++++++
 .../gcc.target/aarch64/sve/unpacked_fadd_1.c       | 52 +++++++++++++++
 .../gcc.target/aarch64/sve/unpacked_fadd_2.c       | 26 ++++++++
 .../gcc.target/aarch64/sve/unpacked_fmaxnm_1.c     | 45 +++++++++++++
 .../gcc.target/aarch64/sve/unpacked_fmaxnm_2.c     | 20 ++++++
 .../gcc.target/aarch64/sve/unpacked_fminnm_1.c     | 46 +++++++++++++
 .../gcc.target/aarch64/sve/unpacked_fminnm_2.c     | 20 ++++++
 .../gcc.target/aarch64/sve/unpacked_fmul_1.c       | 43 ++++++++++++
 .../gcc.target/aarch64/sve/unpacked_fmul_2.c       | 18 +++++
 .../gcc.target/aarch64/sve/unpacked_fsubr_1.c      | 46 +++++++++++++
 .../gcc.target/aarch64/sve/unpacked_fsubr_2.c      | 20 ++++++
 18 files changed, 556 insertions(+), 44 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 66dd5809bcda..a52ef61098bb 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5511,27 +5511,25 @@
 ;; Split a predicated instruction whose predicate is unused into an
 ;; unpredicated instruction.
 (define_split
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_operand:<VPRED> 1 "register_operand")
-          (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
-          (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+          (const_int SVE_RELAXED_GP)
+          (match_operand:SVE_F_B16B16 2 "register_operand")
+          (match_operand:SVE_F_B16B16 3 "register_operand")]
          <SVE_COND_FP>))]
-  "TARGET_SVE
-   && reload_completed
-   && INTVAL (operands[4]) == SVE_RELAXED_GP"
+  "TARGET_SVE && reload_completed"
   [(set (match_dup 0)
-       (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16 (match_dup 2) (match_dup 3)))]
+       (SVE_UNPRED_FP_BINARY:SVE_F_B16B16 (match_dup 2) (match_dup 3)))]
 )
 
 ;; Unpredicated floating-point binary operations (post-RA only).
 ;; These are generated by the split above.
 (define_insn "*post_ra_<sve_fp_op><mode>3"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
-       (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16
-         (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")
-         (match_operand:SVE_FULL_F_B16B16 2 "register_operand" "w")))]
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand" "=w")
+       (SVE_UNPRED_FP_BINARY:SVE_F_B16B16
+         (match_operand:SVE_F_B16B16 1 "register_operand" "w")
+         (match_operand:SVE_F_B16B16 2 "register_operand" "w")))]
   "TARGET_SVE && reload_completed"
   "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
 
@@ -5575,16 +5573,16 @@
 ;; Unpredicated floating-point binary operations that need to be predicated
 ;; for SVE.
 (define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
-       (unspec:SVE_FULL_F_B16B16
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+       (unspec:SVE_F_B16B16
          [(match_dup 3)
-          (const_int SVE_RELAXED_GP)
-          (match_operand:SVE_FULL_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
-          (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
+          (match_dup 4)
+          (match_operand:SVE_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
+          (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
          SVE_COND_FP_BINARY_OPTAB))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]);
   }
 )
 
@@ -5908,12 +5906,12 @@
 
 ;; Predicated floating-point addition.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")
-          (match_operand:SVE_FULL_F 3 
"aarch64_sve_float_arith_with_sub_operand")]
+          (match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand")]
          SVE_COND_FP_ADD))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
@@ -6224,12 +6222,12 @@
 
 ;; Predicated floating-point subtraction.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
-          (match_operand:SVE_FULL_F 3 "register_operand")]
+          (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand")
+          (match_operand:SVE_F 3 "register_operand")]
          SVE_COND_FP_SUB))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2   , 3 , 4   ; attrs: movprfx ]
@@ -6647,12 +6645,12 @@
 
 ;; Predicated floating-point multiplication.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")
-          (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
+          (match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand")]
          SVE_COND_FP_MUL))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
@@ -6881,12 +6879,12 @@
 
 ;; Predicated floating-point maximum/minimum.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-       (unspec:SVE_FULL_F
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:SVE_FULL_F 2 "register_operand")
-          (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
+          (match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand")]
          SVE_COND_FP_MAXMIN))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , %2 , 3   ; attrs: movprfx ]
diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 8c03e28cb084..31bdd85ddb2a 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1346,12 +1346,12 @@
 
 ;; Predicated B16B16 binary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
-       (unspec:VNx8BF_ONLY
-         [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_BF 0 "register_operand")
+       (unspec:SVE_BF
+         [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
           (match_operand:SI 4 "aarch64_sve_gp_strictness")
-          (match_operand:VNx8BF_ONLY 2 "register_operand")
-          (match_operand:VNx8BF_ONLY 3 "register_operand")]
+          (match_operand:SVE_BF 2 "register_operand")
+          (match_operand:SVE_BF 3 "register_operand")]
          SVE_COND_FP_BINARY_OPTAB))]
   "TARGET_SSVE_B16B16 && <supports_bf16>"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx , is_rev ]
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C 
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C
new file mode 100644
index 000000000000..9d6342bbcd21
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only 
-msve-vector-bits=2048" } */
+
+#pragma GCC target "arch=armv9-a+sve-b16b16"
+
+#define ADD(a, b) a + b
+#define SUB(a, b) a - b
+#define MUL(a, b) a * b
+#define MAX(a, b) (a > b) ? a : b
+#define MIN(a, b) (a > b) ? b : a
+
+#define TEST_OP(TYPE, OP)                                        \
+  TYPE test_##TYPE##_##OP (TYPE a, TYPE b) { return OP (a, b); } \
+
+#define TEST_ALL(TYPE, SIZE)                                        \
+  typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE)));       \
+  TEST_OP (TYPE##SIZE, ADD)                                         \
+  TEST_OP (TYPE##SIZE, SUB)                                         \
+  TEST_OP (TYPE##SIZE, MUL)                                         \
+  TEST_OP (TYPE##SIZE, MIN)                                         \
+  TEST_OP (TYPE##SIZE, MAX)
+
+TEST_ALL (__bf16, 64)
+
+TEST_ALL (__bf16, 128)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 5 } } */
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m. 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C 
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C
new file mode 100644
index 000000000000..63de2939e579
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C
@@ -0,0 +1,15 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -ffinite-math-only -fno-signed-zeros -fno-trapping-math 
-msve-vector-bits=2048 " } */
+
+#include "unpacked_binary_bf16_1.C"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 10 } } */
+
+/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, z[0-9]+\.h, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, z[0-9]+\.h, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, z[0-9]+\.h, 
z[0-9]+\.h\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c
new file mode 100644
index 000000000000..e6aa047a6456
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], RHS) > c[i])                               \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (__builtin_fmaxf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fmaxf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fmaxf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c
new file mode 100644
index 000000000000..87125a606a28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_builtin_fmax_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c
new file mode 100644
index 000000000000..b9fded0db642
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], RHS) > c[i])                               \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (__builtin_fminf16, _Float16, uint64_t, 32)
+
+TEST_ALL (__builtin_fminf16, _Float16, uint32_t, 64)
+
+TEST_ALL (__builtin_fminf32, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c
new file mode 100644
index 000000000000..5923b673a2bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_builtin_fmin_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c
new file mode 100644
index 000000000000..9675f56d53ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define ADD(A, B)  A + B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, NAME, RHS)            \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##NAME (TYPE1 *__restrict out,  \
+                                      TYPE0 *__restrict a,     \
+                                      TYPE0 *__restrict b,     \
+                                       TYPE0 *__restrict c)    \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], (TYPE0)RHS) > c[i])                                \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)      \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i]) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1)
+
+TEST_ALL (ADD, _Float16, uint64_t, 32)
+
+TEST_ALL (ADD, _Float16, uint32_t, 64)
+
+TEST_ALL (ADD, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 10 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 11 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c
new file mode 100644
index 000000000000..7a74efd129cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fadd_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 11 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 11 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c
new file mode 100644
index 000000000000..5239e4ba0b95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only 
-moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define MAX(A, B) (A > B) ? A : B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (c[i] = FN (a[i], RHS))                               \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (MAX, _Float16, uint64_t, 32)
+
+TEST_ALL (MAX, _Float16, uint32_t, 64)
+
+TEST_ALL (MAX, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c
new file mode 100644
index 000000000000..11aa7c0b6709
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -fno-trapping-math 
-moverride=sve_width=2048" } */
+
+#include "unpacked_fmaxnm_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c
new file mode 100644
index 000000000000..02a5f462caf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only 
-moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define MIN(A, B) (A < B) ? A : B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (c[i] = FN (a[i], RHS) )                              \
+       out[i] = 3;                                             \
+  }
+
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 0)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (MIN, _Float16, uint64_t, 32)
+
+TEST_ALL (MIN, _Float16, uint32_t, 64)
+
+TEST_ALL (MIN, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c
new file mode 100644
index 000000000000..81f583b3fac6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -fno-trapping-math 
-moverride=sve_width=2048" } */
+
+#include "unpacked_fminnm_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.0\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c
new file mode 100644
index 000000000000..a180a071ea6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define immp5 0.5
+#define MUL(A, B) A * B
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], (TYPE0)RHS) > c[i])                                \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, immp5)
+
+TEST_ALL (MUL, _Float16, uint64_t, 32)
+
+TEST_ALL (MUL, _Float16, uint32_t, 64)
+
+TEST_ALL (MUL, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 5 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c
new file mode 100644
index 000000000000..eb05600e98a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fmul_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 5 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c
new file mode 100644
index 000000000000..2cc8ec29114f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -moverride=sve_width=2048" } */
+
+#include <stdint.h>
+
+#define b_i b[i]
+#define immp5 0.5
+#define SUBR(A, B) B - A
+
+#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS)                  \
+  void                                                         \
+  f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out,   \
+                                     TYPE0 *__restrict a,      \
+                                     TYPE0 *__restrict b,      \
+                                      TYPE0 *__restrict c)     \
+  {                                                            \
+    for (unsigned int i = 0; i < COUNT; i++)                   \
+      if (FN (a[i], (TYPE0)RHS) > c[i])                                \
+       out[i] = 3;                                             \
+  }
+
+#define TEST_ALL(FN, TYPE0, TYPE1, COUNT)   \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)    \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, immp5)  \
+  TEST_FN (FN, TYPE0, TYPE1, COUNT, 1)
+
+TEST_ALL (SUBR, _Float16, uint64_t, 32)
+
+TEST_ALL (SUBR, _Float16, uint32_t, 64)
+
+TEST_ALL (SUBR, float, uint64_t, 32)
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfsubr?\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsubr?\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c
new file mode 100644
index 000000000000..de9325ccacb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -moverride=sve_width=2048 -fno-trapping-math" } */
+
+#include "unpacked_fsubr_1.c"
+
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 6 } } */
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #0.5\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s, #1.0\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, 
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #0.5\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, 
z[0-9]+\.h, #1.0\n} 2 } } */

[gcc r16-2466] aarch64: Add support for unpacked SVE FP binary arithmetic

Reply via email to