[gcc r16-2145] aarch64: Some fixes for SVE INDEX constants

Richard Sandiford via Gcc-cvs Wed, 09 Jul 2025 08:40:55 -0700

https://gcc.gnu.org/g:41c446389446a357172883389e36fd10c882ce6d


commit r16-2145-g41c446389446a357172883389e36fd10c882ce6d
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Wed Jul 9 16:39:20 2025 +0100

    aarch64: Some fixes for SVE INDEX constants
    
    When using SVE INDEX to load an Advanced SIMD vector, we need to
    take account of the different element ordering for big-endian
    targets.  For example, when big-endian targets store the V4SI
    constant { 0, 1, 2, 3 } in registers, 0 becomes the most
    significant element, whereas INDEX always operates from the
    least significant element.  A big-endian target would therefore
    load V4SI { 0, 1, 2, 3 } using:
    
        INDEX Z0.S, #3, #-1
    
    rather than little-endian's:
    
        INDEX Z0.S, #0, #1
    
    While there, I noticed that we would only check the first vector
    in a multi-vector SVE constant, which would trigger an ICE if the
    other vectors turned out to be invalid.  This is pretty difficult to
    trigger at the moment, since we only allow single-register modes to be
    used as frontend & middle-end vector modes, but it can be seen using
    the RTL frontend.
    
    gcc/
            * config/aarch64/aarch64.cc (aarch64_sve_index_series_p): New
            function, split out from...
            (aarch64_simd_valid_imm): ...here.  Account for the different
            SVE and Advanced SIMD element orders on big-endian targets.
            Check each vector in a structure mode.
    
    gcc/testsuite/
            * gcc.dg/rtl/aarch64/vec-series-1.c: New test.
            * gcc.dg/rtl/aarch64/vec-series-2.c: Likewise.
            * gcc.target/aarch64/sve/acle/general/dupq_2.c: Fix expected
            output for this big-endian test.
            * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
            * gcc.target/aarch64/sve/vec_init_3.c: Restrict to little-endian
            targets and add more tests.
            * gcc.target/aarch64/sve/vec_init_4.c: New big-endian version
            of vec_init_3.c.

Diff:
---
 gcc/config/aarch64/aarch64.cc                      |  59 +++++-
 gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c    |  35 ++++
 gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c    |  35 ++++
 .../gcc.target/aarch64/sve/acle/general/dupq_2.c   |   2 +-
 .../gcc.target/aarch64/sve/acle/general/dupq_4.c   |   2 +-
 gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c  | 114 ++++++++++-
 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c  | 209 +++++++++++++++++++++
 7 files changed, 446 insertions(+), 10 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 7960b639f903..bc28f1c584d2 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23074,6 +23074,58 @@ aarch64_sve_index_immediate_p (rtx base_or_step)
          && IN_RANGE (INTVAL (base_or_step), -16, 15));
 }
 
+/* Return true if SERIES is a constant vector that can be loaded using
+   an immediate SVE INDEX, considering both SVE and Advanced SIMD modes.
+   When returning true, store the base in *BASE_OUT and the step
+   in *STEP_OUT.  */
+
+static bool
+aarch64_sve_index_series_p (rtx series, rtx *base_out, rtx *step_out)
+{
+  rtx base, step;
+  if (!const_vec_series_p (series, &base, &step)
+      || !CONST_INT_P (base)
+      || !CONST_INT_P (step))
+    return false;
+
+  auto mode = GET_MODE (series);
+  auto elt_mode = as_a<scalar_int_mode> (GET_MODE_INNER (mode));
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (BYTES_BIG_ENDIAN && (vec_flags & VEC_ADVSIMD))
+    {
+      /* On big-endian targets, architectural lane 0 holds the last element
+        for Advanced SIMD and the first element for SVE; see the comment at
+        the head of aarch64-sve.md for details.  This means that, from an SVE
+        point of view, an Advanced SIMD series goes from the last element to
+        the first.  */
+      auto i = GET_MODE_NUNITS (mode).to_constant () - 1;
+      base = gen_int_mode (UINTVAL (base) + i * UINTVAL (step), elt_mode);
+      step = gen_int_mode (-UINTVAL (step), elt_mode);
+    }
+
+  if (!aarch64_sve_index_immediate_p (base)
+      || !aarch64_sve_index_immediate_p (step))
+    return false;
+
+  /* If the mode spans multiple registers, check that each subseries is
+     in range.  */
+  unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
+  if (nvectors != 1)
+    {
+      unsigned int nunits;
+      if (!GET_MODE_NUNITS (mode).is_constant (&nunits))
+       return false;
+      nunits /= nvectors;
+      for (unsigned int i = 1; i < nvectors; ++i)
+       if (!IN_RANGE (INTVAL (base) + i * nunits * INTVAL (step), -16, 15))
+         return false;
+    }
+
+  *base_out = base;
+  *step_out = step;
+  return true;
+}
+
 /* Return true if X is a valid immediate for the SVE ADD and SUB instructions
    when applied to mode MODE.  Negate X first if NEGATE_P is true.  */
 
@@ -23522,13 +23574,8 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info 
*info,
     n_elts = CONST_VECTOR_NPATTERNS (op);
   else if (which == AARCH64_CHECK_MOV
           && TARGET_SVE
-          && const_vec_series_p (op, &base, &step))
+          && aarch64_sve_index_series_p (op, &base, &step))
     {
-      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
-      if (!aarch64_sve_index_immediate_p (base)
-         || !aarch64_sve_index_immediate_p (step))
-       return false;
-
       if (info)
        {
          /* Get the corresponding container mode.  E.g. an INDEX on V2SI
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c 
b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c
new file mode 100644
index 000000000000..6f795c68ba45
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve"
+
+svint64x2_t __RTL (startwith ("vregs")) foo ()
+{
+  (function "foo"
+    (insn-chain
+      (block 2
+       (edge-from entry (flags "FALLTHRU"))
+       (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+        (cnote 2 NOTE_INSN_FUNCTION_BEG)
+       (insn 3 (set (reg:VNx4DI <0>)
+                    (const_vector:VNx4DI [(const_int 11)
+                                          (const_int 12)
+                                          (const_int 13)
+                                          (const_int 14)
+                                          (const_int 15)
+                                          (const_int 16)
+                                          (const_int 17)
+                                          (const_int 18)])))
+       (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
+        (insn 5 (use (reg:VNx4DI v0)))
+       (edge-to exit (flags "FALLTHRU"))
+      ) ;; block 2
+    ) ;; insn-chain
+    (crtl (return_rtx (reg:VNx4DI v0)))
+  ) ;; function
+}
+
+/* { dg-final { scan-assembler {\tindex\tz0\.d, #11, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz1\.d, #15, #1\n} } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c 
b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c
new file mode 100644
index 000000000000..17e46cbc03c1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c
@@ -0,0 +1,35 @@
+/* { dg-do compile { target aarch64*-*-* } } */
+/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve"
+
+svint64x2_t __RTL (startwith ("vregs")) foo ()
+{
+  (function "foo"
+    (insn-chain
+      (block 2
+       (edge-from entry (flags "FALLTHRU"))
+       (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+        (cnote 2 NOTE_INSN_FUNCTION_BEG)
+       (insn 3 (set (reg:VNx4DI <0>)
+                    (const_vector:VNx4DI [(const_int -16)
+                                          (const_int -15)
+                                          (const_int -14)
+                                          (const_int -13)
+                                          (const_int -12)
+                                          (const_int -11)
+                                          (const_int -10)
+                                          (const_int -9)])))
+       (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
+        (insn 5 (use (reg:VNx4DI v0)))
+       (edge-to exit (flags "FALLTHRU"))
+      ) ;; block 2
+    ) ;; insn-chain
+    (crtl (return_rtx (reg:VNx4DI v0)))
+  ) ;; function
+}
+
+/* { dg-final { scan-assembler {\tindex\tz0\.d, #-16, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz1\.d, #-12, #1\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
index 218a66013375..13ebb9fd6fee 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
@@ -10,6 +10,6 @@ dupq (int x)
   return svdupq_s32 (x, 1, 2, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
index cbee6f27b62f..13d27e2781d1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
@@ -10,6 +10,6 @@ dupq (int x)
   return svdupq_s32 (0, 1, x, 3);
 }
 
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
 /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
 /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
index 25910dbfa1fb..5100a87c0d93 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -mlittle-endian" } */
 /* { dg-final { check-function-bodies "**" "" "" } } */
 
 typedef char v16qi __attribute__ ((vector_size (16)));
@@ -8,7 +8,7 @@ typedef short v8hi __attribute__ ((vector_size (16)));
 typedef short v4hi __attribute__ ((vector_size (8)));
 typedef int v4si __attribute__ ((vector_size (16)));
 typedef int v2si __attribute__ ((vector_size (8)));
-typedef long v2di __attribute__ ((vector_size (16)));
+typedef long long v2di __attribute__ ((vector_size (16)));
 
 /*
 ** f_v16qi:
@@ -97,3 +97,113 @@ g_v4si (void)
 {
   return (v4si){ 3, -1, -5, -9 };
 }
+
+/*
+** g_min_1:
+**     index   z0\.s, #-16, #1
+**     ret
+*/
+v4si
+g_min_1 (void)
+{
+  return (v4si){ -16, -15, -14, -13 };
+}
+
+/*
+** g_min_min:
+**     index   z0\.s, #-16, #-16
+**     ret
+*/
+v4si
+g_min_min (void)
+{
+  return (v4si){ -16, -32, -48, -64 };
+}
+
+/*
+** g_min_max:
+**     index   z0\.s, #-16, #15
+**     ret
+*/
+v4si
+g_min_max (void)
+{
+  return (v4si){ -16, -1, 14, 29 };
+}
+
+/*
+** g_max_1:
+**     index   z0\.s, #15, #1
+**     ret
+*/
+v4si
+g_max_1 (void)
+{
+  return (v4si){ 15, 16, 17, 18 };
+}
+
+/*
+** g_max_min:
+**     index   z0\.s, #15, #-16
+**     ret
+*/
+v4si
+g_max_min (void)
+{
+  return (v4si){ 15, -1, -17, -33 };
+}
+
+/*
+** g_max_max:
+**     index   z0\.s, #15, #15
+**     ret
+*/
+v4si
+g_max_max (void)
+{
+  return (v4si){ 15, 30, 45, 60 };
+}
+
+/*
+** g_ob_1:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_1 (void)
+{
+  return (v4si){ -17, -16, -15, -14 };
+}
+
+/*
+** g_ob_2:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_2 (void)
+{
+  return (v4si){ 16, 17, 18, 19 };
+}
+
+/*
+** g_ob_3:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_3 (void)
+{
+  return (v4si){ 0, -17, -34, -51 };
+}
+
+/*
+** g_ob_4:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_4 (void)
+{
+  return (v4si){ 0, 16, 32, 48 };
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
new file mode 100644
index 000000000000..0681d9591010
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
@@ -0,0 +1,209 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef long long v2di __attribute__ ((vector_size (16)));
+
+/*
+** f_v16qi:
+**     index   z0\.b, #15, #-1
+**     ret
+*/
+v16qi
+f_v16qi (void)
+{
+  return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+}
+
+/*
+** f_v8qi:
+**     index   z0\.b, #7, #-1
+**     ret
+*/
+v8qi
+f_v8qi (void)
+{
+  return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v8hi:
+**     index   z0\.h, #7, #-1
+**     ret
+*/
+v8hi
+f_v8hi (void)
+{
+  return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
+}
+
+/*
+** f_v4hi:
+**     index   z0\.h, #3, #-1
+**     ret
+*/
+v4hi
+f_v4hi (void)
+{
+  return (v4hi){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v4si:
+**     index   z0\.s, #3, #-1
+**     ret
+*/
+v4si
+f_v4si (void)
+{
+  return (v4si){ 0, 1, 2, 3 };
+}
+
+/*
+** f_v2si:
+**     index   z0\.s, #1, #-1
+**     ret
+*/
+v2si
+f_v2si (void)
+{
+  return (v2si){ 0, 1 };
+}
+
+/*
+** f_v2di:
+**     index   z0\.d, #1, #-1
+**     ret
+*/
+v2di
+f_v2di (void)
+{
+  return (v2di){ 0, 1 };
+}
+
+/*
+** g_v4si:
+**     index   z0\.s, #-9, #4
+**     ret
+*/
+v4si
+g_v4si (void)
+{
+  return (v4si){ 3, -1, -5, -9 };
+}
+
+/*
+** g_min_1:
+**     index   z0\.s, #-16, #1
+**     ret
+*/
+v4si
+g_min_1 (void)
+{
+  return (v4si){ -13, -14, -15, -16 };
+}
+
+/*
+** g_min_min:
+**     index   z0\.s, #-16, #-16
+**     ret
+*/
+v4si
+g_min_min (void)
+{
+  return (v4si){ -64, -48, -32, -16 };
+}
+
+/*
+** g_min_max:
+**     index   z0\.s, #-16, #15
+**     ret
+*/
+v4si
+g_min_max (void)
+{
+  return (v4si){ 29, 14, -1, -16 };
+}
+
+/*
+** g_max_1:
+**     index   z0\.s, #15, #1
+**     ret
+*/
+v4si
+g_max_1 (void)
+{
+  return (v4si){ 18, 17, 16, 15 };
+}
+
+/*
+** g_max_min:
+**     index   z0\.s, #15, #-16
+**     ret
+*/
+v4si
+g_max_min (void)
+{
+  return (v4si){ -33, -17, -1, 15 };
+}
+
+/*
+** g_max_max:
+**     index   z0\.s, #15, #15
+**     ret
+*/
+v4si
+g_max_max (void)
+{
+  return (v4si){ 60, 45, 30, 15 };
+}
+
+/*
+** g_ob_1:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_1 (void)
+{
+  return (v4si){ -14, -15, -16, -17 };
+}
+
+/*
+** g_ob_2:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_2 (void)
+{
+  return (v4si){ 19, 18, 17, 16 };
+}
+
+/*
+** g_ob_3:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_3 (void)
+{
+  return (v4si){ -51, -34, -17, 0 };
+}
+
+/*
+** g_ob_4:
+**     ((?!index).)*
+**     ret
+*/
+v4si
+g_ob_4 (void)
+{
+  return (v4si){ 48, 32, 16, 0 };
+}

[gcc r16-2145] aarch64: Some fixes for SVE INDEX constants

Reply via email to