https://gcc.gnu.org/g:a92f54f580c37732a5de01e47aed56882231f196

commit r15-3669-ga92f54f580c37732a5de01e47aed56882231f196
Author: Pengxuan Zheng <quic_pzh...@quicinc.com>
Date:   Tue Sep 10 17:59:46 2024 -0700

    aarch64: Improve vector constant generation using SVE INDEX instruction 
[PR113328]
    
    SVE's INDEX instruction can be used to populate vectors by values starting 
from
    "base" and incremented by "step" for each subsequent value. We can take
    advantage of it to generate vector constants if TARGET_SVE is available and 
the
    base and step values are within [-16, 15].
    
    For example, with the following function:
    
    typedef int v4si __attribute__ ((vector_size (16)));
    v4si
    f_v4si (void)
    {
      return (v4si){ 0, 1, 2, 3 };
    }
    
    GCC currently generates:
    
    f_v4si:
            adrp    x0, .LC4
            ldr     q0, [x0, #:lo12:.LC4]
            ret
    
    .LC4:
            .word   0
            .word   1
            .word   2
            .word   3
    
    With this patch, we generate an INDEX instruction instead if TARGET_SVE is
    available.
    
    f_v4si:
            index   z0.s, #0, #1
            ret
    
            PR target/113328
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.cc (aarch64_simd_valid_immediate): Improve
            handling of some ADVSIMD vectors by using SVE's INDEX if TARGET_SVE 
is
            available.
            (aarch64_output_simd_mov_immediate): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use
            SVE's INDEX instruction.
            * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise.
            * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise.
            * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
            * gcc.target/aarch64/sve/vec_init_3.c: New test.
    
    Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com>

Diff:
---
 gcc/config/aarch64/aarch64.cc                      | 13 ++-
 .../gcc.target/aarch64/sve/acle/general/dupq_1.c   |  3 +-
 .../gcc.target/aarch64/sve/acle/general/dupq_2.c   |  3 +-
 .../gcc.target/aarch64/sve/acle/general/dupq_3.c   |  3 +-
 .../gcc.target/aarch64/sve/acle/general/dupq_4.c   |  3 +-
 gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c  | 99 ++++++++++++++++++++++
 6 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 6ccf08d1cc0a..92763d403c75 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22987,7 +22987,8 @@ aarch64_simd_valid_immediate (rtx op, 
simd_immediate_info *info,
   if (CONST_VECTOR_P (op)
       && CONST_VECTOR_DUPLICATE_P (op))
     n_elts = CONST_VECTOR_NPATTERNS (op);
-  else if ((vec_flags & VEC_SVE_DATA)
+  else if (which == AARCH64_CHECK_MOV
+          && TARGET_SVE
           && const_vec_series_p (op, &base, &step))
     {
       gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
@@ -25245,6 +25246,16 @@ aarch64_output_simd_mov_immediate (rtx const_vector, 
unsigned width,
 
   if (which == AARCH64_CHECK_MOV)
     {
+      if (info.insn == simd_immediate_info::INDEX)
+       {
+         gcc_assert (TARGET_SVE);
+         snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #"
+                   HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
+                   element_char, INTVAL (info.u.index.base),
+                   INTVAL (info.u.index.step));
+         return templ;
+       }
+
       mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
       shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
                  ? "msl" : "lsl");
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
index 216699b0536e..0940bedd0ddb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
index d494943a2753..218a66013375 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
index 4bc8259df073..245d43b75b54 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler 
{\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
index 6f9f9f2f22f5..cbee6f27b62f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
@@ -10,7 +10,6 @@ dupq (int x)
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
-/* { dg-final { scan-assembler 
{\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
new file mode 100644
index 000000000000..25910dbfa1fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
@@ -0,0 +1,99 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+
+/*
+** f_v16qi:
+**     index   z0\.b, #0, #1
+**     ret
+*/
+v16qi
+f_v16qi (void)
+{
+  return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+}
+
+/*
+** f_v8qi:
+**     index   z0\.b, #0, #1
+**     ret
+*/
+v8qi
+f_v8qi (void)
+{
+  return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v8hi:
+**     index   z0\.h, #0, #1
+**     ret
+*/
+v8hi
+f_v8hi (void)
+{
+  return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v4hi:
+**     index   z0\.h, #0, #1
+**     ret
+*/
+v4hi
+f_v4hi (void)
+{
+  return (v4hi){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v4si:
+**     index   z0\.s, #0, #1
+**     ret
+*/
+v4si
+f_v4si (void)
+{
+  return (v4si){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v2si:
+**     index   z0\.s, #0, #1
+**     ret
+*/
+v2si
+f_v2si (void)
+{
+  return (v2si){ 0, 1 };
+}
+
+/*
+** f_v2di:
+**     index   z0\.d, #0, #1
+**     ret
+*/
+v2di
+f_v2di (void)
+{
+  return (v2di){ 0, 1 };
+}
+
+/*
+** g_v4si:
+**     index   z0\.s, #3, #-4
+**     ret
+*/
+v4si
+g_v4si (void)
+{
+  return (v4si){ 3, -1, -5, -9 };
+}

Reply via email to