This patch extends the expanders for unpredicated smax, smin, add, sub, mul, min, and max, so that they support partial SVE FP modes.
The relevant insn/split patterns have also been updated. gcc/ChangeLog: * config/aarch64/aarch64-sve.md (<optab><mode>3): Extend from SVE_FULL_F to SVE_F, and use aarch64_sve_fp_pred. (@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F to SVE_F, use aarch64_predicate_operand. (ADD/SUB/MUL/MAX/MIN). * config/aarch64/aarch64-sve2.md: Likewise, for BF16 operations. gcc/testsuite/ChangeLog: * g++.target/aarch64/sve/unpacked_binary_bf16_1.C: New test. * g++.target/aarch64/sve/unpacked_binary_bf16_2.C: Likewise. * gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fadd_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fadd_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fmaxnm_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fmaxnm_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fminnm_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fminnm_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fmul_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fmul_2.c: Likewise. * gcc.target/aarch64/sve/unpacked_fsubr_1.c: Likewise. * gcc.target/aarch64/sve/unpacked_fsubr_2.c: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 70 +++++++++---------- gcc/config/aarch64/aarch64-sve2.md | 10 +-- .../aarch64/sve/unpacked_binary_bf16_1.C | 35 ++++++++++ .../aarch64/sve/unpacked_binary_bf16_2.C | 15 ++++ .../aarch64/sve/unpacked_builtin_fmax_1.c | 40 +++++++++++ .../aarch64/sve/unpacked_builtin_fmax_2.c | 16 +++++ .../aarch64/sve/unpacked_builtin_fmin_1.c | 40 +++++++++++ .../aarch64/sve/unpacked_builtin_fmin_2.c | 16 +++++ .../gcc.target/aarch64/sve/unpacked_fadd_1.c | 48 +++++++++++++ .../gcc.target/aarch64/sve/unpacked_fadd_2.c | 22 ++++++ .../aarch64/sve/unpacked_fmaxnm_1.c | 41 +++++++++++ .../aarch64/sve/unpacked_fmaxnm_2.c | 16 +++++ .../aarch64/sve/unpacked_fminnm_1.c | 42 +++++++++++ .../aarch64/sve/unpacked_fminnm_2.c | 16 +++++ .../gcc.target/aarch64/sve/unpacked_fmul_1.c | 39 +++++++++++ .../gcc.target/aarch64/sve/unpacked_fmul_2.c | 14 ++++ .../gcc.target/aarch64/sve/unpacked_fsubr_1.c | 42 +++++++++++ .../gcc.target/aarch64/sve/unpacked_fsubr_2.c | 16 +++++ 18 files changed, 498 insertions(+), 40 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 76de511420f..cdad900d9cf 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -5473,27 +5473,27 @@ ;; Split a predicated instruction whose predicate is unused into an ;; unpredicated instruction. (define_split - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] <SVE_COND_FP>))] "TARGET_SVE && reload_completed && INTVAL (operands[4]) == SVE_RELAXED_GP" [(set (match_dup 0) - (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16 (match_dup 2) (match_dup 3)))] + (SVE_UNPRED_FP_BINARY:SVE_F_B16B16 (match_dup 2) (match_dup 3)))] ) ;; Unpredicated floating-point binary operations (post-RA only). ;; These are generated by the split above. (define_insn "*post_ra_<sve_fp_op><mode>3" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w") - (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16 - (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand" "w")))] + [(set (match_operand:SVE_F_B16B16 0 "register_operand" "=w") + (SVE_UNPRED_FP_BINARY:SVE_F_B16B16 + (match_operand:SVE_F_B16B16 1 "register_operand" "w") + (match_operand:SVE_F_B16B16 2 "register_operand" "w")))] "TARGET_SVE && reload_completed" "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") @@ -5537,16 +5537,16 @@ ;; Unpredicated floating-point binary operations that need to be predicated ;; for SVE. (define_expand "<optab><mode>3" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_dup 3) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 1 "<sve_pred_fp_rhs1_operand>") - (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")] + (match_dup 4) + (match_operand:SVE_F_B16B16 1 "<sve_pred_fp_rhs1_operand>") + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")] SVE_COND_FP_BINARY_OPTAB))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { - operands[3] = aarch64_ptrue_reg (<VPRED>mode); + operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]); } ) @@ -5870,12 +5870,12 @@ ;; Predicated floating-point addition. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand")] SVE_COND_FP_ADD))] "TARGET_SVE" {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ] @@ -6186,12 +6186,12 @@ ;; Predicated floating-point subtraction. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand") + (match_operand:SVE_F 3 "register_operand")] SVE_COND_FP_SUB))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ] @@ -6609,12 +6609,12 @@ ;; Predicated floating-point multiplication. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand")] SVE_COND_FP_MUL))] "TARGET_SVE" {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ] @@ -6843,12 +6843,12 @@ ;; Predicated floating-point maximum/minimum. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand")] SVE_COND_FP_MAXMIN))] "TARGET_SVE" {@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ] diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 62524f36de6..4f97659c585 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1337,12 +1337,12 @@ ;; Predicated B16B16 binary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:VNx8BF_ONLY 0 "register_operand") - (unspec:VNx8BF_ONLY - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_BF 0 "register_operand") + (unspec:SVE_BF + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 4 "aarch64_sve_gp_strictness") - (match_operand:VNx8BF_ONLY 2 "register_operand") - (match_operand:VNx8BF_ONLY 3 "register_operand")] + (match_operand:SVE_BF 2 "register_operand") + (match_operand:SVE_BF 3 "register_operand")] SVE_COND_FP_BINARY_OPTAB))] "TARGET_SSVE_B16B16 && <supports_bf16>" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx , is_rev ] diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C new file mode 100644 index 00000000000..9d6342bbcd2 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_1.C @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -msve-vector-bits=2048" } */ + +#pragma GCC target "arch=armv9-a+sve-b16b16" + +#define ADD(a, b) a + b +#define SUB(a, b) a - b +#define MUL(a, b) a * b +#define MAX(a, b) (a > b) ? a : b +#define MIN(a, b) (a > b) ? b : a + +#define TEST_OP(TYPE, OP) \ + TYPE test_##TYPE##_##OP (TYPE a, TYPE b) { return OP (a, b); } \ + +#define TEST_ALL(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE, ADD) \ + TEST_OP (TYPE##SIZE, SUB) \ + TEST_OP (TYPE##SIZE, MUL) \ + TEST_OP (TYPE##SIZE, MIN) \ + TEST_OP (TYPE##SIZE, MAX) + +TEST_ALL (__bf16, 64) + +TEST_ALL (__bf16, 128) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 5 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 5 } } */ + +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m. z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C new file mode 100644 index 00000000000..63de2939e57 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_binary_bf16_2.C @@ -0,0 +1,15 @@ +/* { dg-do compile }*/ +/* { dg-options "-O2 -ffinite-math-only -fno-signed-zeros -fno-trapping-math -msve-vector-bits=2048 " } */ + +#include "unpacked_binary_bf16_1.C" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 10 } } */ + +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c new file mode 100644 index 00000000000..368064bbe93 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_1.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i], RHS) > c[i]) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1) + +TEST_ALL (__builtin_fmaxf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_fmaxf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_fmaxf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c new file mode 100644 index 00000000000..2a23d046cca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmax_2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_builtin_fmax_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c new file mode 100644 index 00000000000..d639b665de6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_1.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define b_i b[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i], RHS) > c[i]) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1) + +TEST_ALL (__builtin_fminf16, _Float16, uint64_t, 32) + +TEST_ALL (__builtin_fminf16, _Float16, uint32_t, 64) + +TEST_ALL (__builtin_fminf32, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c new file mode 100644 index 00000000000..9beb8402709 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_builtin_fmin_2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_builtin_fmin_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c new file mode 100644 index 00000000000..5745272bfb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_1.c @@ -0,0 +1,48 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define ADD(A, B) A + B + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, NAME, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##NAME (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i], (TYPE0)RHS) > c[i]) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i, b[i]) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, p5, 0.5) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, np5, -0.5) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, one, 1) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, none, -1) + +TEST_ALL (ADD, _Float16, uint64_t, 32) + +TEST_ALL (ADD, _Float16, uint32_t, 64) + +TEST_ALL (ADD, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 5 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 10 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c new file mode 100644 index 00000000000..acadedcda44 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fadd_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile }*/ +/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fadd_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 15 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c new file mode 100644 index 00000000000..2811339a5d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_1.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define b_i b[i] +#define MAX(A, B) (A > B) ? A : B + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (c[i] = FN (a[i], RHS) ) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1) + +TEST_ALL (MAX, _Float16, uint64_t, 32) + +TEST_ALL (MAX, _Float16, uint32_t, 64) + +TEST_ALL (MAX, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c new file mode 100644 index 00000000000..de072abe2a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmaxnm_2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_fmaxnm_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c new file mode 100644 index 00000000000..d0d3262310c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define b_i b[i] +#define MIN(A, B) (A < B) ? A : B + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (c[i] = FN (a[i], RHS) ) \ + out[i] = 3; \ + } + + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1) + +TEST_ALL (MIN, _Float16, uint64_t, 32) + +TEST_ALL (MIN, _Float16, uint32_t, 64) + +TEST_ALL (MIN, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c new file mode 100644 index 00000000000..75b73270fca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fminnm_2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-signed-zeros -ffinite-math-only -msve-vector-bits=2048 -ftree-vectorize -fno-trapping-math" } */ + +#include "unpacked_fminnm_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c new file mode 100644 index 00000000000..0cc7f460ae7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define b_i b[i] +#define immp5 0.5 +#define MUL(A, B) A * B + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i], (TYPE0)RHS) > c[i]) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, immp5) + +TEST_ALL (MUL, _Float16, uint64_t, 32) + +TEST_ALL (MUL, _Float16, uint32_t, 64) + +TEST_ALL (MUL, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c new file mode 100644 index 00000000000..904ff88d6ee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmul_2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fmul_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 6 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c new file mode 100644 index 00000000000..7c66b8255c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize" } */ + +#include <stdint.h> + +#define b_i b[i] +#define immp5 0.5 +#define SUBR(A, B) B - A + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, RHS) \ + void \ + f_##FN##_##TYPE0##_##TYPE1##_##RHS (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN (a[i], (TYPE0)RHS) > c[i]) \ + out[i] = 3; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, immp5) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 1) + +TEST_ALL (SUBR, _Float16, uint64_t, 32) + +TEST_ALL (SUBR, _Float16, uint32_t, 64) + +TEST_ALL (SUBR, float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 6 } } */ + +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c new file mode 100644 index 00000000000..66ee974866e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fsubr_2.c @@ -0,0 +1,16 @@ +/* { dg-do compile }*/ +/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_fsubr_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 9 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 2 } } */ -- 2.34.1