This patch adds more tests for vector permutes which can now be optimized as FMOV with the generic PERM change and the aarch64 AND patch.
Changes since v1: * v2: Add -mlittle-endian to the little endian tests explicitly and rename the tests accordingly. PR target/100165 gcc/testsuite/ChangeLog: * gcc.target/aarch64/fmov-3-be.c: New test. * gcc.target/aarch64/fmov-3-le.c: New test. * gcc.target/aarch64/fmov-4-be.c: New test. * gcc.target/aarch64/fmov-4-le.c: New test. * gcc.target/aarch64/fmov-5-be.c: New test. * gcc.target/aarch64/fmov-5-le.c: New test. Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com> --- gcc/testsuite/gcc.target/aarch64/fmov-3-be.c | 75 ++++++++++ gcc/testsuite/gcc.target/aarch64/fmov-3-le.c | 130 ++++++++++++++++ gcc/testsuite/gcc.target/aarch64/fmov-4-be.c | 54 +++++++ gcc/testsuite/gcc.target/aarch64/fmov-4-le.c | 94 ++++++++++++ gcc/testsuite/gcc.target/aarch64/fmov-5-be.c | 150 +++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/fmov-5-le.c | 150 +++++++++++++++++++ 6 files changed, 653 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-3-be.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-3-le.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-4-be.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-4-le.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-5-be.c create mode 100644 gcc/testsuite/gcc.target/aarch64/fmov-5-le.c diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-3-be.c b/gcc/testsuite/gcc.target/aarch64/fmov-3-be.c new file mode 100644 index 00000000000..eff013a6722 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-3-be.c @@ -0,0 +1,75 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbig-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef short v4hi __attribute__ ((vector_size (8))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef float v4sf __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov s0, s0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 2, 3 }); +} + +/* +** f_v8hi: +** fmov s0, s0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 8, 9, 10, 11, 12, 13, 6, 7 }); +} + +/* +** f_v4si: +** fmov d0, d0 +** ret +*/ +v4si +f_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 6, 7, 2, 3 }); +} + +/* +** g_v4si: +** fmov d0, d0 +** ret +*/ +v4si +g_v4si (v4si x) +{ + return __builtin_shuffle ((v4si){ 0, 0, 0, 0 }, x, (v4si){ 2, 3, 6, 7 }); +} + +/* +** h_v4si: +** fmov s0, s0 +** ret +*/ +v4si +h_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 4, 5, 6, 3 }); +} + +/* +** f_v4sf: +** fmov d0, d0 +** ret +*/ +v4sf +f_v4sf (v4sf x) +{ + return __builtin_shuffle (x, (v4sf){ 0, 0, 0, 0 }, (v4si){ 6, 7, 2, 3 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-3-le.c b/gcc/testsuite/gcc.target/aarch64/fmov-3-le.c new file mode 100644 index 00000000000..adbf87243f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-3-le.c @@ -0,0 +1,130 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlittle-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +typedef short v4hi __attribute__ ((vector_size (8))); +typedef char v8qi __attribute__ ((vector_size (8))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef float v4sf __attribute__ ((vector_size (16))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef char v16qi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov s0, s0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 0, 1, 4, 5 }); +} + +/* +** g_v4hi: +** uzp1 v([0-9]+).2d, v0.2d, v0.2d +** adrp x([0-9]+), .LC0 +** ldr d([0-9]+), \[x\2, #:lo12:.LC0\] +** tbl v0.8b, {v\1.16b}, v\3.8b +** ret +*/ +v4hi +g_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 3, 1, 4, 2 }); +} + +/* +** f_v8hi: +** fmov s0, s0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 0, 1, 8, 9, 10, 11, 12, 13 }); +} + +/* +** f_v4si: +** fmov d0, d0 +** ret +*/ +v4si +f_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 0, 1, 4, 5 }); +} + +/* +** g_v4si: +** fmov d0, d0 +** ret +*/ +v4si +g_v4si (v4si x) +{ + return __builtin_shuffle ((v4si){ 0, 0, 0, 0 }, x, (v4si){ 4, 5, 2, 3 }); +} + +/* +** h_v4si: +** fmov s0, s0 +** ret +*/ +v4si +h_v4si (v4si x) +{ + return __builtin_shuffle (x, (v4si){ 0, 0, 0, 0 }, (v4si){ 0, 4, 5, 6 }); +} + +/* +** f_v4sf: +** fmov d0, d0 +** ret +*/ +v4sf +f_v4sf (v4sf x) +{ + return __builtin_shuffle (x, (v4sf){ 0, 0, 0, 0 }, (v4si){ 0, 1, 6, 7 }); +} + +/* +** f_v8qi: +** fmov s0, s0 +** ret +*/ +v8qi +f_v8qi (v8qi x) +{ + return __builtin_shuffle (x, (v8qi){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8qi){ 0, 1, 2, 3, 10, 11, 12, 13 }); +} + +/* +** f_v16qi: +** fmov d0, d0 +** ret +*/ +v16qi +f_v16qi (v16qi x) +{ + return __builtin_shuffle ( + x, (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 }); +} + +/* +** g_v16qi: +** fmov s0, s0 +** ret +*/ +v16qi +g_v16qi (v16qi x) +{ + return __builtin_shuffle ( + x, (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + (v16qi){ 0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 }); +} + diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-4-be.c b/gcc/testsuite/gcc.target/aarch64/fmov-4-be.c new file mode 100644 index 00000000000..58212ef435e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-4-be.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbig-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 6, 3 }); +} + +/* +** g_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +g_v4hi (v4hi x) +{ + return __builtin_shuffle ((v4hi){ 0, 0, 0, 0 }, x, (v4hi){ 0, 1, 2, 7 }); +} + +/* +** f_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 8, 9, 10, 11, 12, 13, 14, 7 }); +} + +/* +** g_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +g_v8hi (v8hi x) +{ + return __builtin_shuffle ((v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, + (v8hi){ 0, 1, 2, 3, 4, 5, 6, 15 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-4-le.c b/gcc/testsuite/gcc.target/aarch64/fmov-4-le.c new file mode 100644 index 00000000000..3449a516425 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-4-le.c @@ -0,0 +1,94 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlittle-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef short v4hi __attribute__ ((vector_size (8))); +typedef char v8qi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef char v16qi __attribute__ ((vector_size (16))); + +/* +** f_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +f_v4hi (v4hi x) +{ + return __builtin_shuffle (x, (v4hi){ 0, 0, 0, 0 }, (v4hi){ 0, 4, 5, 6 }); +} + +/* +** g_v4hi: +** fmov h0, h0 +** ret +*/ +v4hi +g_v4hi (v4hi x) +{ + return __builtin_shuffle ((v4hi){ 0, 0, 0, 0 }, x, (v4hi){ 4, 0, 1, 2 }); +} + +/* +** f_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +f_v8hi (v8hi x) +{ + return __builtin_shuffle (x, (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 0, 8, 9, 10, 11, 12, 13, 14 }); +} + +/* +** g_v8hi: +** fmov h0, h0 +** ret +*/ +v8hi +g_v8hi (v8hi x) +{ + return __builtin_shuffle ((v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, + (v8hi){ 8, 0, 1, 2, 3, 4, 5, 6 }); +} + +/* +** f_v8qi: +** fmov h0, h0 +** ret +*/ +v8qi +f_v8qi (v8qi x) +{ + return __builtin_shuffle (x, (v8qi){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8qi){ 0, 1, 8, 9, 10, 11, 12, 13 }); +} + + +/* +** g_v8qi: +** fmov h0, h0 +** ret +*/ +v8qi +g_v8qi (v8qi x) +{ + return __builtin_shuffle ((v8qi){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, + (v8qi){ 8, 9, 0, 1, 2, 3, 4, 5 }); +} + +/* +** h_v16qi: +** fmov h0, h0 +** ret +*/ +v16qi +h_v16qi (v16qi x) +{ + return __builtin_shuffle ( + x, (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + (v16qi){ 0, 1, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-5-be.c b/gcc/testsuite/gcc.target/aarch64/fmov-5-be.c new file mode 100644 index 00000000000..0fcefa7c948 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-5-be.c @@ -0,0 +1,150 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbig-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef __fp16 v4hf __attribute__ ((vector_size (8))); +typedef __fp16 v8hf __attribute__ ((vector_size (16))); +typedef __bf16 v4bf __attribute__ ((vector_size (8))); +typedef __bf16 v8bf __attribute__ ((vector_size (16))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +f_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 6, 3 }); +} + +/* +** g_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +g_v4hf (v4hf x) +{ + return __builtin_shuffle ((v4hf){ 0, 0, 0, 0 }, x, (v4hi){ 0, 1, 2, 7 }); +} + +/* +** h_v4hf: +** fmov s0, s0 +** ret +*/ +v4hf +h_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 2, 3 }); +} + +/* +** f_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +f_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 8, 9, 10, 11, 12, 13, 14, 7 }); +} + +/* +** g_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +g_v8hf (v8hf x) +{ + return __builtin_shuffle ((v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, + (v8hi){ 0, 1, 2, 3, 4, 5, 6, 15 }); +} + +/* +** h_v8hf: +** fmov s0, s0 +** ret +*/ +v8hf +h_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 8, 9, 10, 11, 12, 13, 6, 7 }); +} + +/* +** f_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +f_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 6, 3 }); +} + +/* +** g_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +g_v4bf (v4bf x) +{ + return __builtin_shuffle ((v4bf){ 0, 0, 0, 0 }, x, (v4hi){ 0, 1, 2, 7 }); +} + +/* +** h_v4bf: +** fmov s0, s0 +** ret +*/ +v4bf +h_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 4, 5, 2, 3 }); +} + +/* +** f_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +f_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 8, 9, 10, 11, 12, 13, 14, 7 }); +} + +/* +** g_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +g_v8bf (v8bf x) +{ + return __builtin_shuffle ((v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, + (v8hi){ 0, 1, 2, 3, 4, 5, 6, 15 }); +} + +/* +** h_v8bf: +** fmov s0, s0 +** ret +*/ +v8bf +h_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 8, 9, 10, 11, 12, 13, 6, 7 }); +} diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-5-le.c b/gcc/testsuite/gcc.target/aarch64/fmov-5-le.c new file mode 100644 index 00000000000..e3ad420483b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/fmov-5-le.c @@ -0,0 +1,150 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlittle-endian" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#pragma GCC target ("arch=armv8.2-a+fp16") + +typedef __fp16 v4hf __attribute__ ((vector_size (8))); +typedef __fp16 v8hf __attribute__ ((vector_size (16))); +typedef __bf16 v4bf __attribute__ ((vector_size (8))); +typedef __bf16 v8bf __attribute__ ((vector_size (16))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); + +/* +** f_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +f_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 0, 4, 5, 6 }); +} + +/* +** g_v4hf: +** fmov h0, h0 +** ret +*/ +v4hf +g_v4hf (v4hf x) +{ + return __builtin_shuffle ((v4hf){ 0, 0, 0, 0 }, x, (v4hi){ 4, 0, 1, 2 }); +} + +/* +** h_v4hf: +** fmov s0, s0 +** ret +*/ +v4hf +h_v4hf (v4hf x) +{ + return __builtin_shuffle (x, (v4hf){ 0, 0, 0, 0 }, (v4hi){ 0, 1, 4, 5 }); +} + +/* +** f_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +f_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 0, 8, 9, 10, 11, 12, 13, 14 }); +} + +/* +** g_v8hf: +** fmov h0, h0 +** ret +*/ +v8hf +g_v8hf (v8hf x) +{ + return __builtin_shuffle ((v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, + (v8hi){ 8, 0, 1, 2, 3, 4, 5, 6 }); +} + +/* +** h_v8hf: +** fmov s0, s0 +** ret +*/ +v8hf +h_v8hf (v8hf x) +{ + return __builtin_shuffle (x, (v8hf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 0, 1, 8, 9, 10, 11, 12, 13 }); +} + +/* +** f_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +f_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 0, 4, 5, 6 }); +} + +/* +** g_v4bf: +** fmov h0, h0 +** ret +*/ +v4bf +g_v4bf (v4bf x) +{ + return __builtin_shuffle ((v4bf){ 0, 0, 0, 0 }, x, (v4hi){ 4, 0, 1, 2 }); +} + +/* +** h_v4bf: +** fmov s0, s0 +** ret +*/ +v4bf +h_v4bf (v4bf x) +{ + return __builtin_shuffle (x, (v4bf){ 0, 0, 0, 0 }, (v4hi){ 0, 1, 4, 5 }); +} + +/* +** f_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +f_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 0, 8, 9, 10, 11, 12, 13, 14 }); +} + +/* +** g_v8bf: +** fmov h0, h0 +** ret +*/ +v8bf +g_v8bf (v8bf x) +{ + return __builtin_shuffle ((v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, x, + (v8hi){ 8, 0, 1, 2, 3, 4, 5, 6 }); +} + +/* +** h_v8bf: +** fmov s0, s0 +** ret +*/ +v8bf +h_v8bf (v8bf x) +{ + return __builtin_shuffle (x, (v8bf){ 0, 0, 0, 0, 0, 0, 0, 0 }, + (v8hi){ 0, 1, 8, 9, 10, 11, 12, 13 }); +} -- 2.17.1