We can still use SVE's INDEX instruction to construct vectors even if not all
elements are constants. For example, { 0, x, 2, 3 } can be constructed by first
using "INDEX #0, #1" to generate { 0, 1, 2, 3 }, and then set the elements which
are non-constants separately.

        PR target/113328

gcc/ChangeLog:

        * config/aarch64/aarch64.cc (aarch64_expand_vector_init_fallback):
        Improve part-variable vector generation with SVE's INDEX if TARGET_SVE
        is available.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
        check-function-bodies.
        * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
        * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
        * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
        * gcc.target/aarch64/sve/vec_init_4.c: New test.
        * gcc.target/aarch64/sve/vec_init_5.c: New test.

Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com>
---
 gcc/config/aarch64/aarch64.cc                 | 81 ++++++++++++++++++-
 .../aarch64/sve/acle/general/dupq_1.c         | 18 ++++-
 .../aarch64/sve/acle/general/dupq_2.c         | 18 ++++-
 .../aarch64/sve/acle/general/dupq_3.c         | 18 ++++-
 .../aarch64/sve/acle/general/dupq_4.c         | 18 ++++-
 .../gcc.target/aarch64/sve/vec_init_4.c       | 47 +++++++++++
 .../gcc.target/aarch64/sve/vec_init_5.c       | 12 +++
 7 files changed, 199 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 6b3ca57d0eb..7305a5c6375 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23942,12 +23942,91 @@ aarch64_expand_vector_init_fallback (rtx target, rtx 
vals)
   if (n_var != n_elts)
     {
       rtx copy = copy_rtx (vals);
+      bool is_index_seq = false;
+
+      /* If at least half of the elements of the vector are constants and all
+        these constant elements form a linear sequence of the form { B, B + S,
+        B + 2 * S, B + 3 * S, ... }, we can generate the vector with SVE's
+        INDEX instruction if SVE is available and then set the elements which
+        are not constant separately.  More precisely, each constant element I
+        has to be B + I * S where B and S must be valid immediate operand for
+        an SVE INDEX instruction.
+
+        For example, { X, 1, 2, 3} is a vector satisfying these conditions and
+        we can generate a vector of all constants (i.e., { 0, 1, 2, 3 }) first
+        and then set the first element of the vector to X.  */
+
+      if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+         && n_var <= n_elts / 2)
+       {
+         int const_idx = -1;
+         HOST_WIDE_INT const_val = 0;
+         int base = 16;
+         int step = 16;
+
+         for (int i = 0; i < n_elts; ++i)
+           {
+             rtx x = XVECEXP (vals, 0, i);
+
+             if (!CONST_INT_P (x))
+               continue;
+
+             if (const_idx == -1)
+               {
+                 const_idx = i;
+                 const_val = INTVAL (x);
+               }
+             else
+               {
+                 if ((INTVAL (x) - const_val) % (i - const_idx) == 0)
+                   {
+                     HOST_WIDE_INT s
+                         = (INTVAL (x) - const_val) / (i - const_idx);
+                     if (s >= -16 && s <= 15)
+                       {
+                         int b = const_val - s * const_idx;
+                         if (b >= -16 && b <= 15)
+                           {
+                             base = b;
+                             step = s;
+                           }
+                       }
+                   }
+                 break;
+               }
+           }
+
+         if (base != 16
+             && (!CONST_INT_P (v0)
+                 || (CONST_INT_P (v0) && INTVAL (v0) == base)))
+           {
+             if (!CONST_INT_P (v0))
+               XVECEXP (copy, 0, 0) = GEN_INT (base);
+
+             is_index_seq = true;
+             for (int i = 1; i < n_elts; ++i)
+               {
+                 rtx x = XVECEXP (copy, 0, i);
+
+                 if (CONST_INT_P (x))
+                   {
+                     if (INTVAL (x) != base + i * step)
+                       {
+                         is_index_seq = false;
+                         break;
+                       }
+                   }
+                 else
+                   XVECEXP (copy, 0, i) = GEN_INT (base + i * step);
+               }
+           }
+       }
 
       /* Load constant part of vector.  We really don't care what goes into the
         parts we will overwrite, but we're more likely to be able to load the
         constant efficiently if it has fewer, larger, repeating parts
         (see aarch64_simd_valid_immediate).  */
-      for (int i = 0; i < n_elts; i++)
+      for (int i = 0; !is_index_seq && i < n_elts; i++)
        {
          rtx x = XVECEXP (vals, 0, i);
          if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
index 0940bedd0dd..80eb1efdc66 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
@@ -1,15 +1,27 @@
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
 /* { dg-require-effective-target aarch64_little_endian } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 #include <arm_sve.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**     index   z0\.s, #0, #1
+**     ins     v0\.s\[0\], w0
+**     dup     z0\.q, z0\.q\[0\]
+**     ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
index 218a6601337..afcad0a691e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
@@ -1,15 +1,27 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 /* To avoid needing big-endian header files.  */
 #pragma GCC aarch64 "arm_sve.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**     index   z0\.s, #3, #-1
+**     ins     v0\.s\[0\], w0
+**     dup     z0\.q, z0\.q\[0\]
+**     ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
index 245d43b75b5..f912f4b905c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
@@ -1,15 +1,27 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 /* To avoid needing big-endian header files.  */
 #pragma GCC aarch64 "arm_sve.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**     index   z0\.s, #0, #1
+**     ins     v0\.s\[2\], w0
+**     dup     z0\.q, z0\.q\[0\]
+**     ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
index cbee6f27b62..0cfdb23101b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
@@ -1,15 +1,27 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
 
 /* To avoid needing big-endian header files.  */
 #pragma GCC aarch64 "arm_sve.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dupq:
+**     index   z0\.s, #3, #-1
+**     ins     v0\.s\[2\], w0
+**     dup     z0\.q, z0\.q\[0\]
+**     ret
+*/
 svint32_t
 dupq (int x)
 {
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
+#ifdef __cplusplus
+}
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
new file mode 100644
index 00000000000..898168dc8ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef long v2di __attribute__((vector_size(16)));
+
+/*
+** f:
+**     index   z0\.s, #0, #1
+**     ins     v0\.s\[1\], w0
+**     ret
+*/
+v4si
+f (int x)
+{
+  return (v4si){ 0, x, 2, 3 };
+}
+
+/*
+** f1:
+**     index   z0\.s, #3, #-4
+**     ins     v0\.s\[1\], w0
+**     ins     v0\.s\[2\], w1
+**     ret
+*/
+v4si
+f1 (int x, int y)
+{
+  return (v4si){ 3, x, y, -9 };
+}
+
+/*
+** f2:
+**     index   z0\.h, #4, #2
+**     ins     v0\.h\[0\], w0
+**     ins     v0\.h\[3\], w1
+**     ins     v0\.h\[7\], w2
+**     ret
+*/
+v8hi
+f2 (short x, short y, short z)
+{
+  return (v8hi){ x, 6, 8, y, 12, 14, 16, z };
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
new file mode 100644
index 00000000000..e4a71736f5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si
+f (int x, int y)
+{
+  return (v4si){ 1, x, y, 3 };
+}
+
+/* { dg-final { scan-assembler-not {index} } } */
-- 
2.17.1

Reply via email to