Tamar Christina <tamar.christ...@arm.com> writes: > Hi All, > > As suggested in the ticket this replaces the expansion by converting the > Advanced SIMD types to SVE types by simply printing out an SVE register for > these instructions. > > This fixes the subreg issues since there are no subregs involved anymore. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR target/109636 > * config/aarch64/aarch64-simd.md (<su_optab>div<mode>3, > mulv2di3): Remove. > * config/aarch64/iterators.md (VQDIV): Remove. > (SVE_FULL_SDI_SIMD, SVE_FULL_SDI_SIMD_DI, SVE_FULL_HSDI_SIMD_DI, > SVE_I_SIMD_DI): New. > (VPRED, sve_lane_con): Add V4SI and V2DI. > * config/aarch64/aarch64-sve.md (<optab><mode>3, > @aarch64_pred_<optab><mode>): Support Advanced SIMD types. > (mul<mode>3): New, split from <optab><mode>3. > (@aarch64_pred_<optab><mode>, *post_ra_<optab><mode>3): New. > * config/aarch64/aarch64-sve2.md (@aarch64_mul_lane_<mode>, > *aarch64_mul_unpredicated_<mode>): Change SVE_FULL_HSDI to > SVE_FULL_HSDI_SIMD_DI. > > gcc/testsuite/ChangeLog: > > PR target/109636 > * gcc.target/aarch64/sve/pr109636_1.c: New test. > * gcc.target/aarch64/sve/pr109636_2.c: New test. > * gcc.target/aarch64/sve2/pr109636_1.c: New test. > > --- inline copy of patch -- > [...] > @@ -550,6 +559,13 @@ (define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI > VNx4SI VNx2SI > VNx2DI]) > > +;; All SVE integer vector modes and Advanced SIMD 64-bit vector > +;; element modes > +(define_mode_iterator SVE_I_SIMD_DI [VNx16QI VNx8QI VNx4QI VNx2QI > + VNx8HI VNx4HI VNx2HI > + VNx4SI VNx2SI > + VNx2DI V2DI]) > +
IMO this would be more robust as: (define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI]) Your call on whether that's better or worse for the others. OK with that changes, thanks. I suppose at some point we should extend the division patterns to V2SI, but that's clearly not stage 4 material. Richard > ;; SVE integer vector modes whose elements are 16 bits or wider. > (define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI > VNx4SI VNx2SI > @@ -2268,7 +2284,8 @@ (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI > "VNx8BI") > (VNx32HI "VNx8BI") (VNx32HF "VNx8BI") > (VNx32BF "VNx8BI") > (VNx16SI "VNx4BI") (VNx16SF "VNx4BI") > - (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")]) > + (VNx8DI "VNx2BI") (VNx8DF "VNx2BI") > + (V4SI "VNx4BI") (V2DI "VNx2BI")]) > > ;; ...and again in lower case. > (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi") > @@ -2370,6 +2387,7 @@ (define_mode_attr narrower_mask [(VNx8HI "0x81") > (VNx4HI "0x41") > > ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index. > (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x") > + (V2DI "x") > (VNx8HF "y") (VNx4SF "y") (VNx2DF "x")]) > > ;; The constraint to use for an SVE FCMLA lane index. > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..5b37ddd2770bcbbec37b9563644da0ba061d3789 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_1.c > @@ -0,0 +1,13 @@ > +/* { dg-additional-options "-O -mtune=a64fx" } */ > + > +typedef unsigned long long __attribute__((__vector_size__ (16))) V; > +typedef unsigned long long __attribute__((__vector_size__ (32))) W; > + > +extern void bar (V v); > + > +void foo (V v, W w) > +{ > + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) / v)); > +} > + > +/* { dg-final { scan-assembler {udiv\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, > z[0-9]+.d} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c > b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..6d39dc8e590a04a486a300de10c5480d9c33afba > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr109636_2.c > @@ -0,0 +1,13 @@ > +/* { dg-additional-options "-O -mcpu=a64fx" } */ > + > +typedef unsigned long long __attribute__((__vector_size__ (16))) V; > +typedef unsigned long long __attribute__((__vector_size__ (32))) W; > + > +extern void bar (V v); > + > +void foom (V v, W w) > +{ > + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v)); > +} > + > +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, p[0-9]+/m, z[0-9]+.d, > z[0-9]+.d} } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c > b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..2bea18ad703cb3e1a1ce896bcedc2530e831a192 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr109636_1.c > @@ -0,0 +1,13 @@ > +/* { dg-additional-options "-O -mtune=a64fx" } */ > + > +typedef unsigned long long __attribute__((__vector_size__ (16))) V; > +typedef unsigned long long __attribute__((__vector_size__ (32))) W; > + > +extern void bar (V v); > + > +void foom (V v, W w) > +{ > + bar (__builtin_shuffle (v, __builtin_shufflevector ((V){}, w, 4, 5) * v)); > +} > + > +/* { dg-final { scan-assembler {mul\tz[0-9]+.d, z[0-9]+.d, z[0-9]+.d} } } */