On Wed, Jul 9, 2025 at 7:07 AM Richard Sandiford <richard.sandif...@arm.com> wrote: > > TARGET_VECTORIZE_VEC_PERM_CONST has code to match the SVE2.1 > "hybrid VLA" DUPQ, EXTQ, UZPQ{1,2}, and ZIPQ{1,2} instructions. > This matching was conditional on !BYTES_BIG_ENDIAN. > > The ACLE code also lowered the associated SVE2.1 intrinsics into > suitable VEC_PERM_EXPRs. This lowering was not conditional on > !BYTES_BIG_ENDIAN. > > The mismatch led to lots of ICEs in the ACLE tests on big-endian > targets: we lowered to VEC_PERM_EXPRs that are not supported. > > I think the !BYTES_BIG_ENDIAN restriction was unnecessary. > SVE maps the first memory element to the least significant end of > the register for both endiannesses, so no endian correction or lane > number adjustment is necessary. > > This is in some ways a bit counterintuitive. ZIPQ1 is conceptually > "apply Advanced SIMD ZIP1 to each 128-bit block" and endianness does > matter when choosing between Advanced SIMD ZIP1 and ZIP2. For example, > the V4SI permute selector { 0, 4, 1, 5 } corresponds to ZIP1 for little- > endian and ZIP2 for big-endian. But the difference between the hybrid > VLA and Advanced SIMD permute selectors is a consequence of the > difference between the SVE and Advanced SIMD element orders. > > The same thing applies to ACLE intrinsics. The current lowering of > svzipq1 etc. is correct for both endiannesses. If ACLE code does: > > 2x svld1_s32 + svzipq1_s32 + svst1_s32 > > then the byte-for-byte result is the same for both endiannesses. > On big-endian targets, this is different from using the Advanced SIMD > sequence below for each 128-bit block: > > 2x LDR + ZIP1 + STR > > In contrast, the byte-for-byte result of: > > 2x svld1q_gather_s32 + svzipq1_s32 + svst11_scatter_s32 > > depends on endianness, since the quadword gathers and scatters use > Advanced SIMD byte ordering for each 128-bit block. This gather/scatter > sequence behaves in the same way as the Advanced SIMD LDR+ZIP1+STR > sequence for both endiannesses. > > Programmers writing ACLE code have to be aware of this difference > if they want to support both endiannesses. > > The patch includes some new execution tests to verify the expansion > of the VEC_PERM_EXPRs. > > Tested on aarch64-linux-gnu and aarch64_be-elf. OK to install?
Ok. > > Richard > > > gcc/ > * doc/sourcebuild.texi (aarch64_sve2_hw, aarch64_sve2p1_hw): Document. > * config/aarch64/aarch64.cc (aarch64_evpc_hvla): Extend to > BYTES_BIG_ENDIAN. > > gcc/testsuite/ > * lib/target-supports.exp (check_effective_target_aarch64_sve2p1_hw): > New proc. > * gcc.target/aarch64/sve2/dupq_1.c: Extend to big-endian. Add > noipa attributes. > * gcc.target/aarch64/sve2/extq_1.c: Likewise. > * gcc.target/aarch64/sve2/uzpq_1.c: Likewise. > * gcc.target/aarch64/sve2/zipq_1.c: Likewise. > * gcc.target/aarch64/sve2/dupq_1_run.c: New test. > * gcc.target/aarch64/sve2/extq_1_run.c: Likewise. > * gcc.target/aarch64/sve2/uzpq_1_run.c: Likewise. > * gcc.target/aarch64/sve2/zipq_1_run.c: Likewise. > --- > gcc/config/aarch64/aarch64.cc | 1 - > gcc/doc/sourcebuild.texi | 6 ++ > .../gcc.target/aarch64/sve2/dupq_1.c | 26 +++--- > .../gcc.target/aarch64/sve2/dupq_1_run.c | 87 +++++++++++++++++++ > .../gcc.target/aarch64/sve2/extq_1.c | 2 +- > .../gcc.target/aarch64/sve2/extq_1_run.c | 73 ++++++++++++++++ > .../gcc.target/aarch64/sve2/uzpq_1.c | 2 +- > .../gcc.target/aarch64/sve2/uzpq_1_run.c | 78 +++++++++++++++++ > .../gcc.target/aarch64/sve2/zipq_1.c | 2 +- > .../gcc.target/aarch64/sve2/zipq_1_run.c | 78 +++++++++++++++++ > gcc/testsuite/lib/target-supports.exp | 17 ++++ > 11 files changed, 355 insertions(+), 17 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 7960b639f90..ce25f4f6f9f 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -26752,7 +26752,6 @@ aarch64_evpc_hvla (struct expand_vec_perm_d *d) > machine_mode vmode = d->vmode; > if (!TARGET_SVE2p1 > || !TARGET_NON_STREAMING > - || BYTES_BIG_ENDIAN > || d->vec_flags != VEC_SVE_DATA > || GET_MODE_UNIT_BITSIZE (vmode) > 64) > return false; > diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi > index 6c5586e4b03..85fb810d96c 100644 > --- a/gcc/doc/sourcebuild.texi > +++ b/gcc/doc/sourcebuild.texi > @@ -2373,6 +2373,12 @@ whether it does so by default). > @itemx aarch64_sve1024_hw > @itemx aarch64_sve2048_hw > Like @code{aarch64_sve_hw}, but also test for an exact hardware vector > length. > +@item aarch64_sve2_hw > +AArch64 target that is able to generate and execute SVE2 code (regardless of > +whether it does so by default). > +@item aarch64_sve2p1_hw > +AArch64 target that is able to generate and execute SVE2.1 code (regardless > of > +whether it does so by default). > > @item aarch64_fjcvtzs_hw > AArch64 target that is able to generate and execute armv8.3-a FJCVTZS > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c > b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c > index 5472e30f812..9db60b1ea4f 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1.c > @@ -1,5 +1,5 @@ > /* { dg-options "-O2 -msve-vector-bits=256" } */ > -/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > #include <arm_sve.h> > > @@ -15,7 +15,7 @@ typedef svuint64_t fixed_uint64_t > __attribute__((arm_sve_vector_bits(256))); > ** trn1 z0\.d, z0\.d, z0\.d > ** ret > */ > -fixed_uint64_t > +[[gnu::noipa]] fixed_uint64_t > f1 (fixed_uint64_t z0) > { > return __builtin_shufflevector (z0, z0, 0, 0, 2, 2); > @@ -26,7 +26,7 @@ f1 (fixed_uint64_t z0) > ** trn2 z0\.d, z0\.d, z0\.d > ** ret > */ > -fixed_uint64_t > +[[gnu::noipa]] fixed_uint64_t > f2 (fixed_uint64_t z0) > { > return __builtin_shufflevector (z0, z0, 1, 1, 3, 3); > @@ -37,7 +37,7 @@ f2 (fixed_uint64_t z0) > ** dupq z0\.s, z0\.s\[0\] > ** ret > */ > -fixed_int32_t > +[[gnu::noipa]] fixed_int32_t > f3 (fixed_int32_t z0) > { > return __builtin_shufflevector (z0, z0, 0, 0, 0, 0, 4, 4, 4, 4); > @@ -48,7 +48,7 @@ f3 (fixed_int32_t z0) > ** dupq z0\.s, z0\.s\[1\] > ** ret > */ > -fixed_int32_t > +[[gnu::noipa]] fixed_int32_t > f4 (fixed_int32_t z0) > { > return __builtin_shufflevector (z0, z0, 1, 1, 1, 1, 5, 5, 5, 5); > @@ -59,7 +59,7 @@ f4 (fixed_int32_t z0) > ** dupq z0\.s, z0\.s\[2\] > ** ret > */ > -fixed_int32_t > +[[gnu::noipa]] fixed_int32_t > f5 (fixed_int32_t z0) > { > return __builtin_shufflevector (z0, z0, 2, 2, 2, 2, 6, 6, 6, 6); > @@ -70,7 +70,7 @@ f5 (fixed_int32_t z0) > ** dupq z0\.s, z0\.s\[3\] > ** ret > */ > -fixed_int32_t > +[[gnu::noipa]] fixed_int32_t > f6 (fixed_int32_t z0) > { > return __builtin_shufflevector (z0, z0, 3, 3, 3, 3, 7, 7, 7, 7); > @@ -81,7 +81,7 @@ f6 (fixed_int32_t z0) > ** dupq z0\.h, z0\.h\[0\] > ** ret > */ > -fixed_uint16_t > +[[gnu::noipa]] fixed_uint16_t > f7 (fixed_uint16_t z0) > { > return __builtin_shufflevector (z0, z0, > @@ -95,7 +95,7 @@ f7 (fixed_uint16_t z0) > ** dupq z0\.h, z0\.h\[5\] > ** ret > */ > -fixed_uint16_t > +[[gnu::noipa]] fixed_uint16_t > f8 (fixed_uint16_t z0) > { > return __builtin_shufflevector (z0, z0, > @@ -108,7 +108,7 @@ f8 (fixed_uint16_t z0) > ** dupq z0\.h, z0\.h\[7\] > ** ret > */ > -fixed_uint16_t > +[[gnu::noipa]] fixed_uint16_t > f9 (fixed_uint16_t z0) > { > return __builtin_shufflevector (z0, z0, > @@ -121,7 +121,7 @@ f9 (fixed_uint16_t z0) > ** dupq z0\.b, z0\.b\[0\] > ** ret > */ > -fixed_uint8_t > +[[gnu::noipa]] fixed_uint8_t > f10 (fixed_uint8_t z0) > { > return __builtin_shufflevector (z0, z0, > @@ -136,7 +136,7 @@ f10 (fixed_uint8_t z0) > ** dupq z0\.b, z0\.b\[13\] > ** ret > */ > -fixed_uint8_t > +[[gnu::noipa]] fixed_uint8_t > f11 (fixed_uint8_t z0) > { > return __builtin_shufflevector (z0, z0, > @@ -151,7 +151,7 @@ f11 (fixed_uint8_t z0) > ** dupq z0\.b, z0\.b\[15\] > ** ret > */ > -fixed_uint8_t > +[[gnu::noipa]] fixed_uint8_t > f12 (fixed_uint8_t z0) > { > return __builtin_shufflevector (z0, z0, > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c > b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c > new file mode 100644 > index 00000000000..fd25034c4b4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/dupq_1_run.c > @@ -0,0 +1,87 @@ > +/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */ > +/* { dg-options "-O2 -msve-vector-bits=256" } */ > + > +#include "dupq_1.c" > + > +#define TEST(A, B) \ > + do { \ > + typeof(B) actual_ = (A); \ > + if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0) \ > + __builtin_abort (); \ > + } while (0) > + > +int > +main () > +{ > + fixed_uint64_t a64 = { 0x1122, -1, 0x5566, -2 }; > + fixed_int32_t a32 = { 0x1122, -0x3344, 0x5566, -0x7788, > + 0x99aa, -0xbbcc, 0xddee, -0xff00 }; > + fixed_uint16_t a16 = { 0x9a12, 0xbc34, 0xde56, 0xf078, > + 0x00ff, 0x11ee, 0x22dd, 0x33cc, > + 0x44bb, 0x55aa, 0x6699, 0x7788, > + 0xfe01, 0xdc23, 0xba45, 0x9867 }; > + fixed_uint8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70, > + 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8, > + 0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f, > + 0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 }; > + > + fixed_uint64_t expected1 = { 0x1122, 0x1122, 0x5566, 0x5566 }; > + TEST (f1 (a64), expected1); > + > + fixed_uint64_t expected2 = { -1, -1, -2, -2 }; > + TEST (f2 (a64), expected2); > + > + fixed_int32_t expected3 = { 0x1122, 0x1122, 0x1122, 0x1122, > + 0x99aa, 0x99aa, 0x99aa, 0x99aa }; > + TEST (f3 (a32), expected3); > + > + fixed_int32_t expected4 = { -0x3344, -0x3344, -0x3344, -0x3344, > + -0xbbcc, -0xbbcc, -0xbbcc, -0xbbcc }; > + TEST (f4 (a32), expected4); > + > + fixed_int32_t expected5 = { 0x5566, 0x5566, 0x5566, 0x5566, > + 0xddee, 0xddee, 0xddee, 0xddee }; > + TEST (f5 (a32), expected5); > + > + fixed_int32_t expected6 = { -0x7788, -0x7788, -0x7788, -0x7788, > + -0xff00, -0xff00, -0xff00, -0xff00 }; > + TEST (f6 (a32), expected6); > + > + fixed_uint16_t expected7 = { 0x9a12, 0x9a12, 0x9a12, 0x9a12, > + 0x9a12, 0x9a12, 0x9a12, 0x9a12, > + 0x44bb, 0x44bb, 0x44bb, 0x44bb, > + 0x44bb, 0x44bb, 0x44bb, 0x44bb }; > + TEST (f7 (a16), expected7); > + > + fixed_uint16_t expected8 = { 0x11ee, 0x11ee, 0x11ee, 0x11ee, > + 0x11ee, 0x11ee, 0x11ee, 0x11ee, > + 0xdc23, 0xdc23, 0xdc23, 0xdc23, > + 0xdc23, 0xdc23, 0xdc23, 0xdc23 }; > + TEST (f8 (a16), expected8); > + > + fixed_uint16_t expected9 = { 0x33cc, 0x33cc, 0x33cc, 0x33cc, > + 0x33cc, 0x33cc, 0x33cc, 0x33cc, > + 0x9867, 0x9867, 0x9867, 0x9867, > + 0x9867, 0x9867, 0x9867, 0x9867 }; > + TEST (f9 (a16), expected9); > + > + fixed_uint8_t expected10 = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, > 0x01, > + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, > + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, > + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe > }; > + TEST (f10 (a8), expected10); > + > + fixed_uint8_t expected11 = { 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, > 0xde, > + 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, > + 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, > + 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21 > }; > + TEST (f11 (a8), expected11); > + > + fixed_uint8_t expected12 = { 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, > 0xf8, > + 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, > + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, > + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 > }; > + TEST (f12 (a8), expected12); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c > b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c > index 03c5fb143f7..b8ce89c9576 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1.c > @@ -1,5 +1,5 @@ > /* { dg-options "-O2 -msve-vector-bits=256" } */ > -/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > #include <arm_sve.h> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c > b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c > new file mode 100644 > index 00000000000..6b72c98a22c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/extq_1_run.c > @@ -0,0 +1,73 @@ > +/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */ > +/* { dg-options "-O2 -msve-vector-bits=256" } */ > + > +#include "extq_1.c" > + > +#define TEST(A, B) \ > + do { \ > + typeof(B) actual_ = (A); \ > + if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0) \ > + __builtin_abort (); \ > + } while (0) > + > +int > +main () > +{ > + fixed_float64_t a64 = { 1.5, 3.75, -5.25, 9 }; > + fixed_float64_t b64 = { -2, 4.125, -6.375, 11.5 }; > + fixed_float64_t expected1 = { 3.75, -2, 9, -6.375 }; > + TEST (f1 (a64, b64), expected1); > + > + fixed_uint32_t a32 = { 0x1122, -0x3344, 0x5566, -0x7788, > + 0x99aa, -0xbbcc, 0xddee, -0xff00 }; > + fixed_uint32_t b32 = { 1 << 20, 1 << 21, 1 << 22, 1 << 23, > + 5 << 6, 5 << 7, 5 << 8, 5 << 9 }; > + fixed_uint32_t expected2 = { -0x3344, 0x5566, -0x7788, 1 << 20, > + -0xbbcc, 0xddee, -0xff00, 5 << 6 }; > + fixed_uint32_t expected3 = { -0x7788, 1 << 20, 1 << 21, 1 << 22, > + -0xff00, 5 << 6, 5 << 7, 5 << 8 }; > + TEST (f2 (a32, b32), expected2); > + TEST (f3 (a32, b32), expected3); > + > + fixed_float16_t a16 = { 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, > + 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 4.25 }; > + fixed_float16_t b16 = { -0.5, -0.75, -1, -1.25, -1.5, -1.75, -2, -2.25, > + -2.5, -2.75, -3, -3.25, -3.5, -3.75, -4, -4.25 }; > + fixed_float16_t expected4 = { 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, -0.5, > + 2.75, 3, 3.25, 3.5, 3.75, 4, 4.25, -2.5 }; > + fixed_float16_t expected5 = { 1.75, 2, 2.25, -0.5, -0.75, -1, -1.25, -1.5, > + 3.75, 4, 4.25, -2.5, -2.75, -3, -3.25, -3.5 }; > + fixed_float16_t expected6 = { 2.25, -0.5, -0.75, -1, > + -1.25, -1.5, -1.75, -2, > + 4.25, -2.5, -2.75, -3, > + -3.25, -3.5, -3.75, -4 }; > + TEST (f4 (a16, b16), expected4); > + TEST (f5 (a16, b16), expected5); > + TEST (f6 (a16, b16), expected6); > + > + fixed_int8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70, > + 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8, > + 0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f, > + 0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 }; > + fixed_int8_t b8 = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, > + 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, > + 0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79, 0x8a, > + 0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1, 0x02 }; > + fixed_int8_t expected7 = { 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70, 0x89, > + 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8, 0x11, > + 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f, 0x76, > + 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07, 0x13 }; > + fixed_int8_t expected8 = { 0xbc, 0xcd, 0xde, 0xef, 0xf8, 0x11, 0x22, 0x33, > + 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, > + 0x43, 0x32, 0x21, 0x10, 0x07, 0x13, 0x24, 0x35, > + 0x46, 0x57, 0x68, 0x79, 0x8a, 0x9b, 0xac, 0xbd }; > + fixed_int8_t expected9 = { 0xf8, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, > + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, > + 0x07, 0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79, > + 0x8a, 0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1 }; > + TEST (f7 (a8, b8), expected7); > + TEST (f8 (a8, b8), expected8); > + TEST (f9 (a8, b8), expected9); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c > b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c > index f923e9447ec..47a663e94c2 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1.c > @@ -1,5 +1,5 @@ > /* { dg-options "-O2 -msve-vector-bits=256" } */ > -/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > #include <arm_sve.h> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c > b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c > new file mode 100644 > index 00000000000..9044cae659b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/uzpq_1_run.c > @@ -0,0 +1,78 @@ > +/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */ > +/* { dg-options "-O2 -msve-vector-bits=256" } */ > + > +#include "uzpq_1.c" > + > +typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(256))); > + > +#define TEST(A, B) \ > + do { \ > + typeof(A) actual_ = (A); \ > + if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0) \ > + __builtin_abort (); \ > + } while (0) > + > +int > +main () > +{ > + fixed_int64_t a64 = { 0x1122LL << 31, -1LL << 47, 0x5566 << 15, -2 }; > + fixed_int64_t b64 = { 42, -0x3344LL << 19, 303, -0x7788LL << 27 }; > + fixed_int64_t expected1 = { 0x1122LL << 31, 42, > + 0x5566 << 15, 303 }; > + fixed_int64_t expected2 = { -1LL << 47, -0x3344LL << 19, > + -2, -0x7788LL << 27 }; > + TEST (f1 (a64, b64), expected1); > + TEST (f2 (a64, b64), expected2); > + > + fixed_float32_t a32 = { 0.5, 0.75, 1, 1.25, 2.5, 2.75, 3, 3.25 }; > + fixed_float32_t b32 = { -0.5, -0.75, -1, -1.25, -2.5, -2.75, -3, -3.25 }; > + fixed_float32_t expected3 = { 0.5, 1, -0.5, -1, > + 2.5, 3, -2.5, -3 }; > + fixed_float32_t expected4 = { 0.75, 1.25, -0.75, -1.25, > + 2.75, 3.25, -2.75, -3.25 }; > + TEST (f3 (a32, b32), expected3); > + TEST (f4 (a32, b32), expected4); > + > + fixed_uint16_t a16_i = { 0x9a12, 0xbc34, 0xde56, 0xf078, > + 0x00ff, 0x11ee, 0x22dd, 0x33cc, > + 0x44bb, 0x55aa, 0x6699, 0x7788, > + 0xfe01, 0xdc23, 0xba45, 0x9867 }; > + fixed_uint16_t b16_i = { 0x1010, 0x2020, 0x3030, 0x4040, > + 0x5050, 0x6060, 0x7070, 0x8080, > + 0x9090, 0xa0a0, 0xb0b0, 0xc0c0, > + 0xd0d0, 0xe0e0, 0xf0f0, 0x0f0f }; > + fixed_uint16_t expected5 = { 0x9a12, 0xde56, 0x00ff, 0x22dd, > + 0x1010, 0x3030, 0x5050, 0x7070, > + 0x44bb, 0x6699, 0xfe01, 0xba45, > + 0x9090, 0xb0b0, 0xd0d0, 0xf0f0 }; > + fixed_uint16_t expected6 = { 0xbc34, 0xf078, 0x11ee, 0x33cc, > + 0x2020, 0x4040, 0x6060, 0x8080, > + 0x55aa, 0x7788, 0xdc23, 0x9867, > + 0xa0a0, 0xc0c0, 0xe0e0, 0x0f0f }; > + fixed_bfloat16_t a16, b16; > + __builtin_memcpy (&a16, &a16_i, sizeof (a16)); > + __builtin_memcpy (&b16, &b16_i, sizeof (b16)); > + TEST (f5 (a16, b16), expected5); > + TEST (f6 (a16, b16), expected6); > + > + fixed_uint8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70, > + 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8, > + 0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f, > + 0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 }; > + fixed_uint8_t b8 = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, > + 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, > + 0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79, 0x8a, > + 0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1, 0x02 }; > + fixed_uint8_t expected7 = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, > + 0x11, 0x33, 0x55, 0x77, 0x99, 0xbb, 0xdd, 0xff, > + 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, > + 0x13, 0x35, 0x57, 0x79, 0x9b, 0xbd, 0xdf, 0xf1 > }; > + fixed_uint8_t expected8 = { 0x12, 0x34, 0x56, 0x70, 0x9a, 0xbc, 0xde, 0xf8, > + 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0x00, > + 0xed, 0xcb, 0xa9, 0x8f, 0x65, 0x43, 0x21, 0x07, > + 0x24, 0x46, 0x68, 0x8a, 0xac, 0xce, 0xe0, 0x02 > }; > + TEST (f7 (a8, b8), expected7); > + TEST (f8 (a8, b8), expected8); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c > b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c > index fa420a959c7..482f2e8b829 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1.c > @@ -1,5 +1,5 @@ > /* { dg-options "-O2 -msve-vector-bits=256" } */ > -/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > > #include <arm_sve.h> > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c > b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c > new file mode 100644 > index 00000000000..211f9d945ed > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/zipq_1_run.c > @@ -0,0 +1,78 @@ > +/* { dg-do run { target { aarch64_sve256_hw && aarch64_sve2p1_hw } } } */ > +/* { dg-options "-O2 -msve-vector-bits=256" } */ > + > +#include "zipq_1.c" > + > +typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(256))); > + > +#define TEST(A, B) \ > + do { \ > + typeof(A) actual_ = (A); \ > + if (__builtin_memcmp (&actual_, &(B), sizeof (actual_)) != 0) \ > + __builtin_abort (); \ > + } while (0) > + > +int > +main () > +{ > + fixed_int64_t a64 = { 0x1122LL << 31, -1LL << 47, 0x5566 << 15, -2 }; > + fixed_int64_t b64 = { 42, -0x3344LL << 19, 303, -0x7788LL << 27 }; > + fixed_int64_t expected1 = { 0x1122LL << 31, 42, > + 0x5566 << 15, 303 }; > + fixed_int64_t expected2 = { -1LL << 47, -0x3344LL << 19, > + -2, -0x7788LL << 27 }; > + TEST (f1 (a64, b64), expected1); > + TEST (f2 (a64, b64), expected2); > + > + fixed_float32_t a32 = { 0.5, 0.75, 1, 1.25, 2.5, 2.75, 3, 3.25 }; > + fixed_float32_t b32 = { -0.5, -0.75, -1, -1.25, -2.5, -2.75, -3, -3.25 }; > + fixed_float32_t expected3 = { 0.5, -0.5, 0.75, -0.75, > + 2.5, -2.5, 2.75, -2.75 }; > + fixed_float32_t expected4 = { 1, -1, 1.25, -1.25, > + 3, -3, 3.25, -3.25 }; > + TEST (f3 (a32, b32), expected3); > + TEST (f4 (a32, b32), expected4); > + > + fixed_uint16_t a16_i = { 0x9a12, 0xbc34, 0xde56, 0xf078, > + 0x00ff, 0x11ee, 0x22dd, 0x33cc, > + 0x44bb, 0x55aa, 0x6699, 0x7788, > + 0xfe01, 0xdc23, 0xba45, 0x9867 }; > + fixed_uint16_t b16_i = { 0x1010, 0x2020, 0x3030, 0x4040, > + 0x5050, 0x6060, 0x7070, 0x8080, > + 0x9090, 0xa0a0, 0xb0b0, 0xc0c0, > + 0xd0d0, 0xe0e0, 0xf0f0, 0x0f0f }; > + fixed_uint16_t expected5 = { 0x9a12, 0x1010, 0xbc34, 0x2020, > + 0xde56, 0x3030, 0xf078, 0x4040, > + 0x44bb, 0x9090, 0x55aa, 0xa0a0, > + 0x6699, 0xb0b0, 0x7788, 0xc0c0 }; > + fixed_uint16_t expected6 = { 0x00ff, 0x5050, 0x11ee, 0x6060, > + 0x22dd, 0x7070, 0x33cc, 0x8080, > + 0xfe01, 0xd0d0, 0xdc23, 0xe0e0, > + 0xba45, 0xf0f0, 0x9867, 0x0f0f }; > + fixed_bfloat16_t a16, b16; > + __builtin_memcpy (&a16, &a16_i, sizeof (a16)); > + __builtin_memcpy (&b16, &b16_i, sizeof (b16)); > + TEST (f5 (a16, b16), expected5); > + TEST (f6 (a16, b16), expected6); > + > + fixed_uint8_t a8 = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x70, > + 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf8, > + 0xfe, 0xed, 0xdc, 0xcb, 0xba, 0xa9, 0x98, 0x8f, > + 0x76, 0x65, 0x54, 0x43, 0x32, 0x21, 0x10, 0x07 }; > + fixed_uint8_t b8 = { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, > + 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, > + 0x13, 0x24, 0x35, 0x46, 0x57, 0x68, 0x79, 0x8a, > + 0x9b, 0xac, 0xbd, 0xce, 0xdf, 0xe0, 0xf1, 0x02 }; > + fixed_uint8_t expected7 = { 0x01, 0x11, 0x12, 0x22, 0x23, 0x33, 0x34, 0x44, > + 0x45, 0x55, 0x56, 0x66, 0x67, 0x77, 0x70, 0x88, > + 0xfe, 0x13, 0xed, 0x24, 0xdc, 0x35, 0xcb, 0x46, > + 0xba, 0x57, 0xa9, 0x68, 0x98, 0x79, 0x8f, 0x8a > }; > + fixed_uint8_t expected8 = { 0x89, 0x99, 0x9a, 0xaa, 0xab, 0xbb, 0xbc, 0xcc, > + 0xcd, 0xdd, 0xde, 0xee, 0xef, 0xff, 0xf8, 0x00, > + 0x76, 0x9b, 0x65, 0xac, 0x54, 0xbd, 0x43, 0xce, > + 0x32, 0xdf, 0x21, 0xe0, 0x10, 0xf1, 0x07, 0x02 > }; > + TEST (f7 (a8, b8), expected7); > + TEST (f8 (a8, b8), expected8); > + > + return 0; > +} > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index 956bc0bc7ca..9ab46a0eab4 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -6491,6 +6491,23 @@ proc check_effective_target_aarch64_sve2_hw { } { > }] > } > > +# Return true if this is an AArch64 target that can run SVE2.1 code. > + > +proc check_effective_target_aarch64_sve2p1_hw { } { > + if { ![istarget aarch64*-*-*] } { > + return 0 > + } > + return [check_runtime aarch64_sve2p1_hw_available { > + #pragma GCC target "+sve2p1" > + int > + main (void) > + { > + asm volatile ("dupq z0.b, z0.b[0]"); > + return 0; > + } > + }] > +} > + > # Return true if this is an AArch64 target that can run SVE code and > # if its SVE vectors have exactly BITS bits. > > -- > 2.43.0 >