Tamar Christina <[email protected]> writes:
> Hi All,
>
> As suggested in the ticket this replaces the expansion by converting the
> Advanced SIMD types to SVE types by simply printing out an SVE register for
> these instructions.
>
> This fixes the subreg issues since there are no subregs involved anymore.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR target/109636
> * config/aarch64/aarch64-simd.md (<su_optab>div<mode>3,
> mulv2di3): Remove.
> * config/aarch64/iterators.md (VQDIV): Remove.
> (SVE_FULL_SDI_SIMD, SVE_FULL_SDI_SIMD_DI, SVE_FULL_HSDI_SIMD_DI,
> SVE_I_SIMD_DI): New.
> (VPRED, sve_lane_con): Add V4SI and V2DI.
> * config/aarch64/aarch64-sve.md (<optab><mode>3,
> @aarch64_pred_<optab><mode>): Support Advanced SIMD types.
> (mul<mode>3): New, split from <optab><mode>3.
> (@aarch64_pred_<optab><mode>, *post_ra_<optab><mode>3): New.
> * config/aarch64/aarch64-sve2.md (@aarch64_mul_lane_<mode>,
> *aarch64_mul_unpredicated_<mode>): Change SVE_FULL_HSDI to
> SVE_FULL_HSDI_SIMD_DI.
>
> gcc/testsuite/ChangeLog:
>
> PR target/109636
> * gcc.target/aarch64/sve/pr109636_1.c: New test.
> * gcc.target/aarch64/sve/pr109636_2.c: New test.
> * gcc.target/aarch64/sve2/pr109636_1.c: New test.
>
> --- inline copy of patch --
> [...]
> @@ -550,6 +559,13 @@ (define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI
> VNx4SI VNx2SI
> VNx2DI])
>
> +;; All SVE integer vector modes and Advanced SIMD 64-bit vector
> +;; element modes
> +(define_mode_iterator SVE_I_SIMD_DI [VNx16QI VNx8QI VNx4QI VNx2QI
> + VNx8HI VNx4HI VNx2HI
> + VNx4SI VNx2SI
> + VNx2DI V2DI])
> +
IMO this would be more robust as:
(define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI])
Your call on whether that's better or worse for the others.
OK with that changes, thanks. I suppose at some point we should extend
the division patterns to V2SI, but that's clearly not stage 4 material.
Richard
> ;; SVE integer vector modes whose elements are 16 bits or wider.
> (define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
> VNx4SI VNx2SI
> @@ -2268,7 +2284,8 @@ (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI
> "VNx8BI")
> (VNx32HI "VNx8BI") (VNx32HF "VNx8BI")
> (VNx32BF "VNx8BI")
> (VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
> - (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
> + (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
> + (V4SI "VNx4BI") (V2DI "VNx2BI")])
>
> ;; ...and again in lower case.
> (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
> @@ -2370,6 +2387,7 @@ (define_mode_attr narrower_mask [(VNx8HI "0x81")
> (VNx4HI "0x41")
>
> ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
> (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
> + (V2DI "x")
> (VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
>
> ;; The constraint to use for an SVE FCMLA lane index.
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..5b37ddd2770bcbbec37b9563644da0ba061d3789
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
> @@ -0,0 +1,13 @@
> +/* { dg-additional-options "-O -mtune=a64fx" } */
> +
> +typedef unsigned long long __attribute__((__vector_size__ (16))) V;
> +typedef unsigned long long __attribute__((__vector_size__ (32))) W;
> +
> +extern void bar (V v);
> +
> +void foo (V v, W w)
> +{
> + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) / v));
> +}
> +
> +/* { dg-final { scan-assembler {udiv\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d,
> z[0-9]+.d} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
> b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..6d39dc8e590a04a486a300de10c5480d9c33afba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
> @@ -0,0 +1,13 @@
> +/* { dg-additional-options "-O -mcpu=a64fx" } */
> +
> +typedef unsigned long long __attribute__((__vector_size__ (16))) V;
> +typedef unsigned long long __attribute__((__vector_size__ (32))) W;
> +
> +extern void bar (V v);
> +
> +void foom (V v, W w)
> +{
> + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
> +}
> +
> +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d,
> z[0-9]+.d} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..2bea18ad703cb3e1a1ce896bcedc2530e831a192
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
> @@ -0,0 +1,13 @@
> +/* { dg-additional-options "-O -mtune=a64fx" } */
> +
> +typedef unsigned long long __attribute__((__vector_size__ (16))) V;
> +typedef unsigned long long __attribute__((__vector_size__ (32))) W;
> +
> +extern void bar (V v);
> +
> +void foom (V v, W w)
> +{
> + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
> +}
> +
> +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, z[0-9]+.d, z[0-9]+.d} } } */