On Wed, Jul 9, 2025 at 7:09 AM Richard Sandiford
<richard.sandif...@arm.com> wrote:
>
> When using SVE INDEX to load an Advanced SIMD vector, we need to
> take account of the different element ordering for big-endian
> targets.  For example, when big-endian targets store the V4SI
> constant { 0, 1, 2, 3 } in registers, 0 becomes the most
> significant element, whereas INDEX always operates from the
> least significant element.  A big-endian target would therefore
> load V4SI { 0, 1, 2, 3 } using:
>
>     INDEX Z0.S, #3, #-1
>
> rather than little-endian's:
>
>     INDEX Z0.S, #0, #1
>
> While there, I noticed that we would only check the first vector
> in a multi-vector SVE constant, which would trigger an ICE if the
> other vectors turned out to be invalid.  This is pretty difficult to
> trigger at the moment, since we only allow single-register modes to be
> used as frontend & middle-end vector modes, but it can be seen using
> the RTL frontend.
>
> Tested on aarch64-linux-gnu and aarch64_be-elf.  OK to install?

When I was reviewing the original index patch internally I was worried
about this but it looks like I didn't do a thorough enough job at it.
Anyways this is ok.

Thanks,
Andrew

>
> Richard
>
>
> gcc/
>         * config/aarch64/aarch64.cc (aarch64_sve_index_series_p): New
>         function, split out from...
>         (aarch64_simd_valid_imm): ...here.  Account for the different
>         SVE and Advanced SIMD element orders on big-endian targets.
>         Check each vector in a structure mode.
>
> gcc/testsuite/
>         * gcc.dg/rtl/aarch64/vec-series-1.c: New test.
>         * gcc.dg/rtl/aarch64/vec-series-2.c: Likewise.
>         * gcc.target/aarch64/sve/acle/general/dupq_2.c: Fix expected
>         output for this big-endian test.
>         * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise.
>         * gcc.target/aarch64/sve/vec_init_3.c: Restrict to little-endian
>         targets and add more tests.
>         * gcc.target/aarch64/sve/vec_init_4.c: New big-endian version
>         of vec_init_3.c.
> ---
>  gcc/config/aarch64/aarch64.cc                 |  59 ++++-
>  .../gcc.dg/rtl/aarch64/vec-series-1.c         |  35 +++
>  .../gcc.dg/rtl/aarch64/vec-series-2.c         |  35 +++
>  .../aarch64/sve/acle/general/dupq_2.c         |   2 +-
>  .../aarch64/sve/acle/general/dupq_4.c         |   2 +-
>  .../gcc.target/aarch64/sve/vec_init_3.c       | 114 +++++++++-
>  .../gcc.target/aarch64/sve/vec_init_4.c       | 209 ++++++++++++++++++
>  7 files changed, 446 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index ce25f4f6f9f..6d5b2009b2a 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23074,6 +23074,58 @@ aarch64_sve_index_immediate_p (rtx base_or_step)
>           && IN_RANGE (INTVAL (base_or_step), -16, 15));
>  }
>
> +/* Return true if SERIES is a constant vector that can be loaded using
> +   an immediate SVE INDEX, considering both SVE and Advanced SIMD modes.
> +   When returning true, store the base in *BASE_OUT and the step
> +   in *STEP_OUT.  */
> +
> +static bool
> +aarch64_sve_index_series_p (rtx series, rtx *base_out, rtx *step_out)
> +{
> +  rtx base, step;
> +  if (!const_vec_series_p (series, &base, &step)
> +      || !CONST_INT_P (base)
> +      || !CONST_INT_P (step))
> +    return false;
> +
> +  auto mode = GET_MODE (series);
> +  auto elt_mode = as_a<scalar_int_mode> (GET_MODE_INNER (mode));
> +  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
> +  if (BYTES_BIG_ENDIAN && (vec_flags & VEC_ADVSIMD))
> +    {
> +      /* On big-endian targets, architectural lane 0 holds the last element
> +        for Advanced SIMD and the first element for SVE; see the comment at
> +        the head of aarch64-sve.md for details.  This means that, from an SVE
> +        point of view, an Advanced SIMD series goes from the last element to
> +        the first.  */
> +      auto i = GET_MODE_NUNITS (mode).to_constant () - 1;
> +      base = gen_int_mode (UINTVAL (base) + i * UINTVAL (step), elt_mode);
> +      step = gen_int_mode (-UINTVAL (step), elt_mode);
> +    }
> +
> +  if (!aarch64_sve_index_immediate_p (base)
> +      || !aarch64_sve_index_immediate_p (step))
> +    return false;
> +
> +  /* If the mode spans multiple registers, check that each subseries is
> +     in range.  */
> +  unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
> +  if (nvectors != 1)
> +    {
> +      unsigned int nunits;
> +      if (!GET_MODE_NUNITS (mode).is_constant (&nunits))
> +       return false;
> +      nunits /= nvectors;
> +      for (unsigned int i = 1; i < nvectors; ++i)
> +       if (!IN_RANGE (INTVAL (base) + i * nunits * INTVAL (step), -16, 15))
> +         return false;
> +    }
> +
> +  *base_out = base;
> +  *step_out = step;
> +  return true;
> +}
> +
>  /* Return true if X is a valid immediate for the SVE ADD and SUB instructions
>     when applied to mode MODE.  Negate X first if NEGATE_P is true.  */
>
> @@ -23522,13 +23574,8 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info 
> *info,
>      n_elts = CONST_VECTOR_NPATTERNS (op);
>    else if (which == AARCH64_CHECK_MOV
>            && TARGET_SVE
> -          && const_vec_series_p (op, &base, &step))
> +          && aarch64_sve_index_series_p (op, &base, &step))
>      {
> -      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
> -      if (!aarch64_sve_index_immediate_p (base)
> -         || !aarch64_sve_index_immediate_p (step))
> -       return false;
> -
>        if (info)
>         {
>           /* Get the corresponding container mode.  E.g. an INDEX on V2SI
> diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c 
> b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c
> new file mode 100644
> index 00000000000..6f795c68ba4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-1.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile { target aarch64*-*-* } } */
> +/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve"
> +
> +svint64x2_t __RTL (startwith ("vregs")) foo ()
> +{
> +  (function "foo"
> +    (insn-chain
> +      (block 2
> +       (edge-from entry (flags "FALLTHRU"))
> +       (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +        (cnote 2 NOTE_INSN_FUNCTION_BEG)
> +       (insn 3 (set (reg:VNx4DI <0>)
> +                    (const_vector:VNx4DI [(const_int 11)
> +                                          (const_int 12)
> +                                          (const_int 13)
> +                                          (const_int 14)
> +                                          (const_int 15)
> +                                          (const_int 16)
> +                                          (const_int 17)
> +                                          (const_int 18)])))
> +       (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
> +        (insn 5 (use (reg:VNx4DI v0)))
> +       (edge-to exit (flags "FALLTHRU"))
> +      ) ;; block 2
> +    ) ;; insn-chain
> +    (crtl (return_rtx (reg:VNx4DI v0)))
> +  ) ;; function
> +}
> +
> +/* { dg-final { scan-assembler {\tindex\tz0\.d, #11, #1\n} } } */
> +/* { dg-final { scan-assembler {\tindex\tz1\.d, #15, #1\n} } } */
> diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c 
> b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c
> new file mode 100644
> index 00000000000..17e46cbc03c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/rtl/aarch64/vec-series-2.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile { target aarch64*-*-* } } */
> +/* { dg-options "-O2 -msve-vector-bits=256 -mlittle-endian" } */
> +
> +#include <arm_sve.h>
> +
> +#pragma GCC target "+sve"
> +
> +svint64x2_t __RTL (startwith ("vregs")) foo ()
> +{
> +  (function "foo"
> +    (insn-chain
> +      (block 2
> +       (edge-from entry (flags "FALLTHRU"))
> +       (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
> +        (cnote 2 NOTE_INSN_FUNCTION_BEG)
> +       (insn 3 (set (reg:VNx4DI <0>)
> +                    (const_vector:VNx4DI [(const_int -16)
> +                                          (const_int -15)
> +                                          (const_int -14)
> +                                          (const_int -13)
> +                                          (const_int -12)
> +                                          (const_int -11)
> +                                          (const_int -10)
> +                                          (const_int -9)])))
> +       (insn 4 (set (reg:VNx4DI v0) (reg:VNx4DI <0>)))
> +        (insn 5 (use (reg:VNx4DI v0)))
> +       (edge-to exit (flags "FALLTHRU"))
> +      ) ;; block 2
> +    ) ;; insn-chain
> +    (crtl (return_rtx (reg:VNx4DI v0)))
> +  ) ;; function
> +}
> +
> +/* { dg-final { scan-assembler {\tindex\tz0\.d, #-16, #1\n} } } */
> +/* { dg-final { scan-assembler {\tindex\tz1\.d, #-12, #1\n} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
> index 218a6601337..13ebb9fd6fe 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c
> @@ -10,6 +10,6 @@ dupq (int x)
>    return svdupq_s32 (x, 1, 2, 3);
>  }
>
> -/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
> +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
>  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
>  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
> index cbee6f27b62..13d27e2781d 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c
> @@ -10,6 +10,6 @@ dupq (int x)
>    return svdupq_s32 (0, 1, x, 3);
>  }
>
> -/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
> +/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1\n} } } */
>  /* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
>  /* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
> index 25910dbfa1f..5100a87c0d9 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_3.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2" } */
> +/* { dg-options "-O2 -mlittle-endian" } */
>  /* { dg-final { check-function-bodies "**" "" "" } } */
>
>  typedef char v16qi __attribute__ ((vector_size (16)));
> @@ -8,7 +8,7 @@ typedef short v8hi __attribute__ ((vector_size (16)));
>  typedef short v4hi __attribute__ ((vector_size (8)));
>  typedef int v4si __attribute__ ((vector_size (16)));
>  typedef int v2si __attribute__ ((vector_size (8)));
> -typedef long v2di __attribute__ ((vector_size (16)));
> +typedef long long v2di __attribute__ ((vector_size (16)));
>
>  /*
>  ** f_v16qi:
> @@ -97,3 +97,113 @@ g_v4si (void)
>  {
>    return (v4si){ 3, -1, -5, -9 };
>  }
> +
> +/*
> +** g_min_1:
> +**     index   z0\.s, #-16, #1
> +**     ret
> +*/
> +v4si
> +g_min_1 (void)
> +{
> +  return (v4si){ -16, -15, -14, -13 };
> +}
> +
> +/*
> +** g_min_min:
> +**     index   z0\.s, #-16, #-16
> +**     ret
> +*/
> +v4si
> +g_min_min (void)
> +{
> +  return (v4si){ -16, -32, -48, -64 };
> +}
> +
> +/*
> +** g_min_max:
> +**     index   z0\.s, #-16, #15
> +**     ret
> +*/
> +v4si
> +g_min_max (void)
> +{
> +  return (v4si){ -16, -1, 14, 29 };
> +}
> +
> +/*
> +** g_max_1:
> +**     index   z0\.s, #15, #1
> +**     ret
> +*/
> +v4si
> +g_max_1 (void)
> +{
> +  return (v4si){ 15, 16, 17, 18 };
> +}
> +
> +/*
> +** g_max_min:
> +**     index   z0\.s, #15, #-16
> +**     ret
> +*/
> +v4si
> +g_max_min (void)
> +{
> +  return (v4si){ 15, -1, -17, -33 };
> +}
> +
> +/*
> +** g_max_max:
> +**     index   z0\.s, #15, #15
> +**     ret
> +*/
> +v4si
> +g_max_max (void)
> +{
> +  return (v4si){ 15, 30, 45, 60 };
> +}
> +
> +/*
> +** g_ob_1:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_1 (void)
> +{
> +  return (v4si){ -17, -16, -15, -14 };
> +}
> +
> +/*
> +** g_ob_2:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_2 (void)
> +{
> +  return (v4si){ 16, 17, 18, 19 };
> +}
> +
> +/*
> +** g_ob_3:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_3 (void)
> +{
> +  return (v4si){ 0, -17, -34, -51 };
> +}
> +
> +/*
> +** g_ob_4:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_4 (void)
> +{
> +  return (v4si){ 0, 16, 32, 48 };
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
> new file mode 100644
> index 00000000000..0681d959101
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c
> @@ -0,0 +1,209 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbig-endian" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +typedef char v16qi __attribute__ ((vector_size (16)));
> +typedef char v8qi __attribute__ ((vector_size (8)));
> +typedef short v8hi __attribute__ ((vector_size (16)));
> +typedef short v4hi __attribute__ ((vector_size (8)));
> +typedef int v4si __attribute__ ((vector_size (16)));
> +typedef int v2si __attribute__ ((vector_size (8)));
> +typedef long long v2di __attribute__ ((vector_size (16)));
> +
> +/*
> +** f_v16qi:
> +**     index   z0\.b, #15, #-1
> +**     ret
> +*/
> +v16qi
> +f_v16qi (void)
> +{
> +  return (v16qi){ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
> +}
> +
> +/*
> +** f_v8qi:
> +**     index   z0\.b, #7, #-1
> +**     ret
> +*/
> +v8qi
> +f_v8qi (void)
> +{
> +  return (v8qi){ 0, 1, 2, 3, 4, 5, 6, 7 };
> +}
> +
> +/*
> +** f_v8hi:
> +**     index   z0\.h, #7, #-1
> +**     ret
> +*/
> +v8hi
> +f_v8hi (void)
> +{
> +  return (v8hi){ 0, 1, 2, 3, 4, 5, 6, 7 };
> +}
> +
> +/*
> +** f_v4hi:
> +**     index   z0\.h, #3, #-1
> +**     ret
> +*/
> +v4hi
> +f_v4hi (void)
> +{
> +  return (v4hi){ 0, 1, 2, 3 };
> +}
> +
> +/*
> +** f_v4si:
> +**     index   z0\.s, #3, #-1
> +**     ret
> +*/
> +v4si
> +f_v4si (void)
> +{
> +  return (v4si){ 0, 1, 2, 3 };
> +}
> +
> +/*
> +** f_v2si:
> +**     index   z0\.s, #1, #-1
> +**     ret
> +*/
> +v2si
> +f_v2si (void)
> +{
> +  return (v2si){ 0, 1 };
> +}
> +
> +/*
> +** f_v2di:
> +**     index   z0\.d, #1, #-1
> +**     ret
> +*/
> +v2di
> +f_v2di (void)
> +{
> +  return (v2di){ 0, 1 };
> +}
> +
> +/*
> +** g_v4si:
> +**     index   z0\.s, #-9, #4
> +**     ret
> +*/
> +v4si
> +g_v4si (void)
> +{
> +  return (v4si){ 3, -1, -5, -9 };
> +}
> +
> +/*
> +** g_min_1:
> +**     index   z0\.s, #-16, #1
> +**     ret
> +*/
> +v4si
> +g_min_1 (void)
> +{
> +  return (v4si){ -13, -14, -15, -16 };
> +}
> +
> +/*
> +** g_min_min:
> +**     index   z0\.s, #-16, #-16
> +**     ret
> +*/
> +v4si
> +g_min_min (void)
> +{
> +  return (v4si){ -64, -48, -32, -16 };
> +}
> +
> +/*
> +** g_min_max:
> +**     index   z0\.s, #-16, #15
> +**     ret
> +*/
> +v4si
> +g_min_max (void)
> +{
> +  return (v4si){ 29, 14, -1, -16 };
> +}
> +
> +/*
> +** g_max_1:
> +**     index   z0\.s, #15, #1
> +**     ret
> +*/
> +v4si
> +g_max_1 (void)
> +{
> +  return (v4si){ 18, 17, 16, 15 };
> +}
> +
> +/*
> +** g_max_min:
> +**     index   z0\.s, #15, #-16
> +**     ret
> +*/
> +v4si
> +g_max_min (void)
> +{
> +  return (v4si){ -33, -17, -1, 15 };
> +}
> +
> +/*
> +** g_max_max:
> +**     index   z0\.s, #15, #15
> +**     ret
> +*/
> +v4si
> +g_max_max (void)
> +{
> +  return (v4si){ 60, 45, 30, 15 };
> +}
> +
> +/*
> +** g_ob_1:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_1 (void)
> +{
> +  return (v4si){ -14, -15, -16, -17 };
> +}
> +
> +/*
> +** g_ob_2:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_2 (void)
> +{
> +  return (v4si){ 19, 18, 17, 16 };
> +}
> +
> +/*
> +** g_ob_3:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_3 (void)
> +{
> +  return (v4si){ -51, -34, -17, 0 };
> +}
> +
> +/*
> +** g_ob_4:
> +**     ((?!index).)*
> +**     ret
> +*/
> +v4si
> +g_ob_4 (void)
> +{
> +  return (v4si){ 48, 32, 16, 0 };
> +}
> --
> 2.43.0
>

Reply via email to