This patch extends the expander for fma, fnma, fms, and fnms to support
partial SVE FP modes.
We add the missing BF16 tests, which we can now trigger for having
implemented the conditional expander.
We also add tests for the 'merging with multiplicand' case, which this
expander canonicalizes (albeit under SVE_STRICT_GP).
gcc/ChangeLog:
* config/aarch64/aarch64-sve.md (@cond_<optab><mode>): Extend
to support partial FP modes.
(*cond_<optab><mode>_2_strict): Extend from SVE_FULL_F to SVE_F,
use aarch64_predicate_operand.
(*cond_<optab><mode>_4_strict): Extend from SVE_FULL_F_B16B16 to
SVE_F_B16B16, use aarch64_predicate_operand.
(*cond_<optab><mode>_any_strict): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/unpacked_cond_fmla_1.c: Add test cases
for merging with multiplcand.
* gcc.target/aarch64/sve/unpacked_cond_fmls_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fmla_2.c: New test.
* gcc.target/aarch64/sve/unpacked_cond_fmls_2.c: Likewise.
* gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c: Likewise..
* gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c: Likewise.
* g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C: Likewise.
* g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C: Likewise.
---
gcc/config/aarch64/aarch64-sve.md | 61 ++++++++++---------
.../sve/unpacked_cond_ternary_bf16_1.C | 35 +++++++++++
.../sve/unpacked_cond_ternary_bf16_2.C | 14 +++++
.../aarch64/sve/unpacked_cond_fmla_1.c | 4 ++
.../aarch64/sve/unpacked_cond_fmla_2.c | 18 ++++++
.../aarch64/sve/unpacked_cond_fmls_1.c | 4 ++
.../aarch64/sve/unpacked_cond_fmls_2.c | 18 ++++++
.../aarch64/sve/unpacked_cond_fnmla_1.c | 4 ++
.../aarch64/sve/unpacked_cond_fnmla_2.c | 18 ++++++
.../aarch64/sve/unpacked_cond_fnmls_1.c | 4 ++
.../aarch64/sve/unpacked_cond_fnmls_2.c | 18 ++++++
11 files changed, 169 insertions(+), 29 deletions(-)
create mode 100644
gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
create mode 100644
gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
diff --git a/gcc/config/aarch64/aarch64-sve.md
b/gcc/config/aarch64/aarch64-sve.md
index e5443980e8b..278f78960a6 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -7599,17 +7599,17 @@
;; Predicated floating-point ternary operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -7617,6 +7617,9 @@
second of the two. */
if (rtx_equal_p (operands[3], operands[5]))
std::swap (operands[2], operands[3]);
+
+ if (rtx pred = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]))
+ operands[1] = pred;
})
;; Predicated floating-point ternary operations, merging with the
@@ -7646,15 +7649,15 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7692,15 +7695,15 @@
)
(define_insn "*cond_<optab><mode>_4_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7760,17 +7763,17 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
diff --git
a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
new file mode 100644
index 00000000000..3a203184dab
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C
@@ -0,0 +1,35 @@
+/* { dg-do compile }*/
+/* { dg-options "-O2 -fno-trapping-math -msve-vector-bits=2048 " } */
+
+#include <stdint.h>
+#pragma GCC target "arch=armv9-a+sve-b16b16"
+
+#define COND_BFMLA(TYPE, PRED_TYPE, MERGE) \
+ TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \
+ {return p ? a * b + c : MERGE; }
+
+#define COND_BFMLS(TYPE, PRED_TYPE, MERGE) \
+ TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \
+ {return p ? a * -b + c : MERGE; }
+
+#define TEST_OP(TYPE, PRED_TYPE, T) \
+ T (TYPE, PRED_TYPE, c) \
+ T (TYPE, PRED_TYPE, 5)
+
+#define TEST(TYPE, PTYPE, SIZE) \
+ typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \
+ typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \
+ TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA) \
+ TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS)
+
+TEST (__bf16, uint16_t, 128)
+
+TEST (__bf16, uint16_t, 64)
+
+/* { dg-final { scan-assembler-times {\tptrue} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git
a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
new file mode 100644
index 00000000000..bc47ece4c59
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_ternary_bf16_1.C"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */
+/* { dg-final { scan-assembler-times {\tand} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
index e2a5bdab1dd..411ed821811 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c
@@ -25,6 +25,8 @@
}
#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, 5)
@@ -35,9 +37,11 @@ TEST_ALL (FMLA (f16), _Float16, uint32_t, 64)
TEST_ALL (FMLA (f32), float, uint64_t, 32)
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
new file mode 100644
index 00000000000..afa01902c35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fmla_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
index 5a83fab8ec7..5a9122d207f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c
@@ -25,6 +25,8 @@
}
#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, 5)
@@ -35,9 +37,11 @@ TEST_ALL (FMLS (f16), _Float16, uint32_t, 64)
TEST_ALL (FMLS (f32), float, uint64_t, 32)
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
new file mode 100644
index 00000000000..2c0407af0f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fmls_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
index 5d4eff806dd..8219c007dec 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c
@@ -25,6 +25,8 @@
}
#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, 5)
@@ -35,9 +37,11 @@ TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64)
TEST_ALL (FNMLA (f32), float, uint64_t, 32)
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
new file mode 100644
index 00000000000..847a5be28c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fnmla_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
index 5569bbabd7d..811d6145fdd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c
@@ -25,6 +25,8 @@
}
#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \
+ TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \
TEST_FN (FN, TYPE0, TYPE1, COUNT, 5)
@@ -35,9 +37,11 @@ TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64)
TEST_ALL (FNMLS (f32), float, uint64_t, 32)
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
new file mode 100644
index 00000000000..423e0732573
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
+
+#include "unpacked_cond_fnmls_1.c"
+
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */
+/* { dg-final { scan-assembler-times {\tand} 12 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m,
z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m,
z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
+
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
--
2.34.1