Spencer Abson <spencer.ab...@arm.com> writes:
> Extend the unary op/UNSPEC_SEL combiner patterns from SVE_FULL_F to SVE_F,
> where the strictness value is SVE_RELAXED_GP.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_relaxed):
>       Extend from SVE_FULL_F to SVE_F.
>       (*cond_<optab><mode>_any_relaxed): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/sve/unpacked_cond_fabs_1.c: New test.
>       * gcc.target/aarch64/sve/unpacked_cond_fneg_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_frinta_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_frinti_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_frintm_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_frintp_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_frintx_1.c: Likewise.
>       * gcc.target/aarch64/sve/unpacked_cond_frintz_1.c: Likewise.

It might be good to have at least one test for the trapping case,
to make sure that the SELs are still present.  It doesn't need to be
for all FRINT variables, just one would be enough.

OK with that change, thanks.

Richard

> ---
>  gcc/config/aarch64/aarch64-sve.md             | 18 +++++-----
>  .../aarch64/sve/unpacked_cond_fabs_1.c        | 32 +++++++++++++++++
>  .../aarch64/sve/unpacked_cond_fneg_1.c        | 34 +++++++++++++++++++
>  .../aarch64/sve/unpacked_cond_frinta_1.c      | 32 +++++++++++++++++
>  .../aarch64/sve/unpacked_cond_frinti_1.c      | 32 +++++++++++++++++
>  .../aarch64/sve/unpacked_cond_frintm_1.c      | 32 +++++++++++++++++
>  .../aarch64/sve/unpacked_cond_frintp_1.c      | 32 +++++++++++++++++
>  .../aarch64/sve/unpacked_cond_frintx_1.c      | 32 +++++++++++++++++
>  .../aarch64/sve/unpacked_cond_frintz_1.c      | 32 +++++++++++++++++
>  9 files changed, 267 insertions(+), 9 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 1a705e153cb..76de511420f 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -3821,13 +3821,13 @@
>  
>  ;; Predicated floating-point unary arithmetic, merging with the first input.
>  (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 3)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "register_operand")]
> +           (match_operand:SVE_F 2 "register_operand")]
>            SVE_COND_FP_UNARY)
>          (match_dup 2)]
>         UNSPEC_SEL))]
> @@ -3869,15 +3869,15 @@
>  ;; as earlyclobber helps to make the instruction more regular to the
>  ;; register allocator.
>  (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
> -  [(set (match_operand:SVE_FULL_F 0 "register_operand")
> -     (unspec:SVE_FULL_F
> +  [(set (match_operand:SVE_F 0 "register_operand")
> +     (unspec:SVE_F
>         [(match_operand:<VPRED> 1 "register_operand")
> -        (unspec:SVE_FULL_F
> +        (unspec:SVE_F
>            [(match_operand 4)
>             (const_int SVE_RELAXED_GP)
> -           (match_operand:SVE_FULL_F 2 "register_operand")]
> +           (match_operand:SVE_F 2 "register_operand")]
>            SVE_COND_FP_UNARY)
> -        (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
> +        (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]
>         UNSPEC_SEL))]
>    "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
>    {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c
> new file mode 100644
> index 00000000000..fea5cd1f50d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fabs_1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (__builtin_fabsf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_fabsf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_fabsf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c
> new file mode 100644
> index 00000000000..b5260a60946
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fneg_1.c
> @@ -0,0 +1,34 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=2048" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define NEG(X) -X
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (NEG, _Float16, uint64_t, 32)
> +
> +TEST_ALL (NEG, _Float16, uint32_t, 64)
> +
> +TEST_ALL (NEG, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c
> new file mode 100644
> index 00000000000..40b268a98fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinta_1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (__builtin_roundf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_roundf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_roundf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfrinta\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c
> new file mode 100644
> index 00000000000..5690811e510
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frinti_1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (__builtin_nearbyintf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_nearbyintf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_nearbyintf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfrinti\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c
> new file mode 100644
> index 00000000000..34832282d24
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintm_1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (__builtin_floorf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_floorf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_floorf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfrintm\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c
> new file mode 100644
> index 00000000000..36b1972ad7b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintp_1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (__builtin_ceilf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_ceilf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_ceilf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfrintp\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c
> new file mode 100644
> index 00000000000..a8314c3458f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintx_1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (__builtin_rintf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_rintf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_rintf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfrintx\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c
> new file mode 100644
> index 00000000000..d2cfc6be455
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_frintz_1.c
> @@ -0,0 +1,32 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msve-vector-bits=2048 -ftree-vectorize 
> -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +#define a_i a[i]
> +#define b_i b[i]
> +
> +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE)                      \
> +  void                                                               \
> +  f_##FN##_##TYPE0##_##TYPE1##_##MERGE (TYPE1 *__restrict p, \
> +                                     TYPE0 *__restrict a,    \
> +                                     TYPE0 *__restrict b)    \
> +  {                                                          \
> +    for (unsigned int i = 0; i < COUNT; i++)                 \
> +      a[i] = p[i] ? FN (a[i]) : MERGE;                               \
> +  }
> +
> +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i)  \
> +  TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i)
> +
> +TEST_ALL (__builtin_truncf16, _Float16, uint64_t, 32)
> +
> +TEST_ALL (__builtin_truncf16, _Float16, uint32_t, 64)
> +
> +TEST_ALL (__builtin_truncf32, float, uint64_t, 32)
> +
> +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.s, p[0-7]/m, 
> z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tfrintz\tz[0-9]+\.h, p[0-7]/m, 
> z[0-9]+\.h\n} 4 } } */
> +
> +/* { dg-final { scan-assembler-not {\tsel\t} } } */

Reply via email to