This patch extends the splitting patterns for combining FP comparisons with predicated logical operations such that they cover all of SVE_F.
gcc/ChangeLog: * config/aarch64/aarch64-sve.md (*fcm<cmp_op><mode>_and_combine): Extend from SVE_FULL_F to SVE_F. (*fcmuo<mode>_and_combine): Likewise. (*fcm<cmp_op><mode>_bic_combine): Likewise. (*fcm<cmp_op><mode>_nor_combine): Likewise. (*fcmuo<mode>_bic_combine): Likewise. (*fcmuo<mode>_nor_combine): Likewise. Move the comment here to above fcmuo<mode>_bic_combine, since it applies to both patterns. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/unpacked_fcm_combines_1.c: New test. * gcc.target/aarch64/sve/unpacked_fcm_combines_2.c: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 26 +++++++------- .../aarch64/sve/unpacked_fcm_combines_1.c | 17 +++++++++ .../aarch64/sve/unpacked_fcm_combines_2.c | 35 +++++++++++++++++++ 3 files changed, 65 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 6b5113eb70f..10aecf1f190 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -8690,8 +8690,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w, w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] + (match_operand:SVE_F 2 "register_operand" "w, w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] SVE_COND_FP_CMP_I0) (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] "TARGET_SVE" @@ -8713,8 +8713,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "register_operand" "w")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO) (match_operand:<VPRED> 4 "register_operand" "Upl")))] "TARGET_SVE" @@ -8740,8 +8740,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")] SVE_COND_FP_CMP_I0)) (match_operand:<VPRED> 4 "register_operand" "Upa")) (match_dup:<VPRED> 1))) @@ -8777,8 +8777,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")] SVE_COND_FP_CMP_I0)) (not:<VPRED> (match_operand:<VPRED> 4 "register_operand" "Upa"))) @@ -8808,6 +8808,7 @@ } ) +;; Same for unordered comparisons. (define_insn_and_split "*fcmuo<mode>_bic_combine" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") (and:<VPRED> @@ -8816,8 +8817,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "register_operand" "w")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO)) (match_operand:<VPRED> 4 "register_operand" "Upa")) (match_dup:<VPRED> 1))) @@ -8843,7 +8844,6 @@ } ) -;; Same for unordered comparisons. (define_insn_and_split "*fcmuo<mode>_nor_combine" [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") (and:<VPRED> @@ -8852,8 +8852,8 @@ (unspec:<VPRED> [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) - (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "register_operand" "w")] + (match_operand:SVE_F 2 "register_operand" "w") + (match_operand:SVE_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO)) (not:<VPRED> (match_operand:<VPRED> 4 "register_operand" "Upa"))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c new file mode 100644 index 00000000000..c1f729e9f0a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */ + +#include "unpacked_fcm_1.c" + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 32 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 32 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 32 } } */ + +/* Drop a PTRUE predicated AND with the loop mask and comparison result in + favour of predicating the comparison with the loop mask. */ +/* { dg-final { scan-assembler-not {\tand\t} } } */ + +/* Similarly, for codes that are implemented via an inversion, prefer + NOT (predicated with the loop mask) over BIC+PTRUE. */ +/* { dg-final { scan-assembler-not {\tbic\t} } } */ +/* { dg-final { scan-assembler-times {\tnot\t} 15 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c new file mode 100644 index 00000000000..e7f7680ce53 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -moverride=sve_width=2048 --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */ + +#include <stdint.h> + +/* Ensure that we still emit NOR here, rather than two NOTs. */ + +#define TEST_FCM_NOR(TYPE0, TYPE1, CMP, COUNT) \ + void \ + f_##TYPE0##_##TYPE1##_##CMP (TYPE0 *__restrict out, \ + TYPE1 *__restrict a, \ + TYPE1 *__restrict b, \ + TYPE1 *__restrict c) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = !(CMP (a[i], c[i]) | CMP (b[i], c[i])) ? 3 : out[i]; \ + } + +#define GT(A, B) ((A) > (B)) + +TEST_FCM_NOR (uint64_t, float, GT, 32) +TEST_FCM_NOR (uint64_t, _Float16, GT, 32) +TEST_FCM_NOR (uint32_t, _Float16, GT, 64) + +TEST_FCM_NOR (uint64_t, float, __builtin_isunordered, 32) +TEST_FCM_NOR (uint64_t, _Float16, __builtin_isunordered, 32) +TEST_FCM_NOR (uint32_t, _Float16, __builtin_isunordered, 64) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */ + +/* { dg-final { scan-assembler-not {\tbic\t} } } */ +/* { dg-final { scan-assembler-not {\tnot\t} } } */ +/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */ -- 2.34.1