Kyrylo Tkachov <ktkac...@nvidia.com> writes:
> Hi all,
>
> While the SVE2 NBSL instruction accepts MOVPRFX to add more flexibility
> due to its tied operands, the destination of the movprfx cannot be also
> a source operand. But the offending pattern in aarch64-sve2.md tries
> to do exactly that for the "=?&w,w,w" alternative and gas warns for the
> attached testcase.
>
> This patch just removes that alternative causing RA to emit a normal extra
> move.
> So for the testcase in the patch we now generate:
> nor_z:
>       nbsl z1.d, z1.d, z2.d, z1.d
>       mov z0.d, z1.d
>       ret
>
> instead of the previous:
> nor_z:
>       movprfx z0, z1
>       nbsl z0.d, z0.d, z2.d, z0.d
>       ret
>
> which generated a gas warning.

Shouldn't we instead change it to:

     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, 
%2.d, %1.d

?  The "&" ensures that %1 is still valid in the NBSL.

(That's OK if it works.)

Thanks,
Richard

> Bootstrapped and tested on aarch64-none-linux-gnu.
> Ok for trunk?
> Do we want to backport it?
>
> Thanks,
> Kyrill
>
>
> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>
> gcc/
>
> PR target/120999
>       * config/aarch64/aarch64-sve2.md (*aarch64_sve2_nor<mode>):
>       Remove movprfx alternative.
>
> gcc/testsuite/
>
>       PR target/120999
>       * gcc.target/aarch64/sve2/pr120999.c: New test.
>
> From bd24ce298461ee8129befda1983acf1b37a7215a Mon Sep 17 00:00:00 2001
> From: Kyrylo Tkachov <ktkac...@nvidia.com>
> Date: Wed, 9 Jul 2025 10:04:01 -0700
> Subject: [PATCH] aarch64: PR target/120999: Avoid movprfx for NBSL
>  implementation of NOR
>
> While the SVE2 NBSL instruction accepts MOVPRFX to add more flexibility
> due to its tied operands, the destination of the movprfx cannot be also
> a source operand.  But the offending pattern in aarch64-sve2.md tries
> to do exactly that for the "=?&w,w,w" alternative and gas warns for the
> attached testcase.
>
> This patch just removes that alternative causing RA to emit a normal extra
> move.
> So for the testcase in the patch we now generate:
> nor_z:
>         nbsl    z1.d, z1.d, z2.d, z1.d
>         mov     z0.d, z1.d
>         ret
>
> instead of the previous:
> nor_z:
>         movprfx z0, z1
>         nbsl    z0.d, z0.d, z2.d, z0.d
>         ret
>
> which generated a gas warning.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
>
> gcc/
>
>       PR target/120999
>       * config/aarch64/aarch64-sve2.md (*aarch64_sve2_nor<mode>):
>       Remove movprfx alternative.
>
> gcc/testsuite/
>
>       PR target/120999
>       * gcc.target/aarch64/sve2/pr120999.c: New test.
> ---
>  gcc/config/aarch64/aarch64-sve2.md               | 11 ++++-------
>  gcc/testsuite/gcc.target/aarch64/sve2/pr120999.c | 15 +++++++++++++++
>  2 files changed, 19 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/pr120999.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve2.md 
> b/gcc/config/aarch64/aarch64-sve2.md
> index 15714712d3b..504ba6fc39b 100644
> --- a/gcc/config/aarch64/aarch64-sve2.md
> +++ b/gcc/config/aarch64/aarch64-sve2.md
> @@ -1616,20 +1616,17 @@
>  
>  ;; Use NBSL for vector NOR.
>  (define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
> -  [(set (match_operand:SVE_FULL_I 0 "register_operand")
> +  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
>       (unspec:SVE_FULL_I
>         [(match_operand 3)
>          (and:SVE_FULL_I
>            (not:SVE_FULL_I
> -            (match_operand:SVE_FULL_I 1 "register_operand"))
> +            (match_operand:SVE_FULL_I 1 "register_operand" "%0"))
>            (not:SVE_FULL_I
> -            (match_operand:SVE_FULL_I 2 "register_operand")))]
> +            (match_operand:SVE_FULL_I 2 "register_operand" "w")))]
>         UNSPEC_PRED_X))]
>    "TARGET_SVE2"
> -  {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
> -     [ w        , 0  , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %0.d
> -     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, 
> %0.d, %2.d, %0.d
> -  }
> +  "nbsl\t%0.d, %0.d, %2.d, %0.d"
>    "&& !CONSTANT_P (operands[3])"
>    {
>      operands[3] = CONSTM1_RTX (<VPRED>mode);
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr120999.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/pr120999.c
> new file mode 100644
> index 00000000000..1cdfa4107ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr120999.c
> @@ -0,0 +1,15 @@
> +/* PR target/120999.  */
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 --save-temps" } */
> +
> +/* We shouldn't be generating a MOVPRFX form of NBSL here or we'll get
> +   an assembler warning.  */
> +
> +#include <arm_sve.h>
> +
> +#define NOR(x, y)   (~((x) | (y)))
> +
> +svuint64_t nor_z(svuint64_t c, svuint64_t a, svuint64_t b) { return NOR(a, 
> b); }
> +
> +/* { dg-final { scan-assembler-not {\tmovprfx} } } */
> +

Reply via email to