Tamar Christina <tamar.christ...@arm.com> writes:
> Hi All,
>
> As suggested in the ticket this replaces the expansion by converting the
> Advanced SIMD types to SVE types by simply printing out an SVE register for
> these instructions.
>
> This fixes the subreg issues since there are no subregs involved anymore.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       PR target/109636
>       * config/aarch64/aarch64-simd.md (<su_optab>div<mode>3,
>       mulv2di3): Remove.
>       * config/aarch64/iterators.md (VQDIV): Remove.
>       (SVE_FULL_SDI_SIMD, SVE_FULL_SDI_SIMD_DI, SVE_FULL_HSDI_SIMD_DI,
>       SVE_I_SIMD_DI): New.
>       (VPRED, sve_lane_con): Add V4SI and V2DI.
>       * config/aarch64/aarch64-sve.md (<optab><mode>3,
>       @aarch64_pred_<optab><mode>): Support Advanced SIMD types.
>       (mul<mode>3): New, split from <optab><mode>3.
>       (@aarch64_pred_<optab><mode>, *post_ra_<optab><mode>3): New.
>       * config/aarch64/aarch64-sve2.md (@aarch64_mul_lane_<mode>,
>       *aarch64_mul_unpredicated_<mode>): Change SVE_FULL_HSDI to
>       SVE_FULL_HSDI_SIMD_DI.
>
> gcc/testsuite/ChangeLog:
>
>       PR target/109636
>       * gcc.target/aarch64/sve/pr109636_1.c: New test.
>       * gcc.target/aarch64/sve/pr109636_2.c: New test.
>       * gcc.target/aarch64/sve2/pr109636_1.c: New test.
>
> --- inline copy of patch -- 
> [...]
> @@ -550,6 +559,13 @@ (define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI
>                            VNx4SI VNx2SI
>                            VNx2DI])
>  
> +;; All SVE integer vector modes and Advanced SIMD 64-bit vector
> +;; element modes
> +(define_mode_iterator SVE_I_SIMD_DI [VNx16QI VNx8QI VNx4QI VNx2QI
> +                                  VNx8HI VNx4HI VNx2HI
> +                                  VNx4SI VNx2SI
> +                                  VNx2DI V2DI])
> +

IMO this would be more robust as:

(define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI])

Your call on whether that's better or worse for the others.

OK with that changes, thanks.  I suppose at some point we should extend
the division patterns to V2SI, but that's clearly not stage 4 material.

Richard

>  ;; SVE integer vector modes whose elements are 16 bits or wider.
>  (define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
>                               VNx4SI VNx2SI
> @@ -2268,7 +2284,8 @@ (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI 
> "VNx8BI")
>                        (VNx32HI "VNx8BI") (VNx32HF "VNx8BI")
>                        (VNx32BF "VNx8BI")
>                        (VNx16SI "VNx4BI") (VNx16SF "VNx4BI")
> -                      (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")])
> +                      (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
> +                      (V4SI "VNx4BI") (V2DI "VNx2BI")])
>  
>  ;; ...and again in lower case.
>  (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
> @@ -2370,6 +2387,7 @@ (define_mode_attr narrower_mask [(VNx8HI "0x81") 
> (VNx4HI "0x41")
>  
>  ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
>  (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
> +                                                       (V2DI "x")
>                               (VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
>  
>  ;; The constraint to use for an SVE FCMLA lane index.
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..5b37ddd2770bcbbec37b9563644da0ba061d3789
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c
> @@ -0,0 +1,13 @@
> +/* { dg-additional-options "-O -mtune=a64fx" } */
> +
> +typedef unsigned long long __attribute__((__vector_size__ (16))) V;
> +typedef unsigned long long __attribute__((__vector_size__ (32))) W;
> +
> +extern void bar (V v);
> +
> +void foo (V v, W w)
> +{
> +  bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) / v));
> +}
> +
> +/* { dg-final { scan-assembler {udiv\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, 
> z[0-9]+.d} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..6d39dc8e590a04a486a300de10c5480d9c33afba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c
> @@ -0,0 +1,13 @@
> +/* { dg-additional-options "-O -mcpu=a64fx" } */
> +
> +typedef unsigned long long __attribute__((__vector_size__ (16))) V;
> +typedef unsigned long long __attribute__((__vector_size__ (32))) W;
> +
> +extern void bar (V v);
> +
> +void foom (V v, W w)
> +{
> +  bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
> +}
> +
> +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, 
> z[0-9]+.d} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..2bea18ad703cb3e1a1ce896bcedc2530e831a192
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c
> @@ -0,0 +1,13 @@
> +/* { dg-additional-options "-O -mtune=a64fx" } */
> +
> +typedef unsigned long long __attribute__((__vector_size__ (16))) V;
> +typedef unsigned long long __attribute__((__vector_size__ (32))) W;
> +
> +extern void bar (V v);
> +
> +void foom (V v, W w)
> +{
> +  bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v));
> +}
> +
> +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, z[0-9]+.d, z[0-9]+.d} } } */

Reply via email to