https://gcc.gnu.org/g:1048ebbbdc98a5928a974356d7f4244603b6bd32

commit r15-4110-g1048ebbbdc98a5928a974356d7f4244603b6bd32
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Mon Oct 7 13:03:02 2024 +0100

    aarch64: Handle SVE modes in aarch64_evpc_reencode [PR116583]
    
    For Advanced SIMD modes, aarch64_evpc_reencode tests whether
    a permute in a narrow element mode can be done more cheaply
    in a wider mode.  For example, { 0, 1, 8, 9, 4, 5, 12, 13 }
    on V8HI is a natural TRN1 on V4SI ({ 0, 4, 2, 6 }).
    
    This patch extends the code to handle SVE data and predicate
    modes as well.  This is a prerequisite to getting good results
    for PR116583.
    
    gcc/
            PR target/116583
            * config/aarch64/aarch64.cc (aarch64_coalesce_units): New function,
            extending the Advanced SIMD handling from...
            (aarch64_evpc_reencode): ...here to SVE data and predicate modes.
    
    gcc/testsuite/
            PR target/116583
            * gcc.target/aarch64/sve/permute_1.c: New test.
            * gcc.target/aarch64/sve/permute_2.c: Likewise.
            * gcc.target/aarch64/sve/permute_3.c: Likewise.
            * gcc.target/aarch64/sve/permute_4.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64.cc                    |  55 ++++-
 gcc/testsuite/gcc.target/aarch64/sve/permute_1.c | 106 +++++++++
 gcc/testsuite/gcc.target/aarch64/sve/permute_2.c | 277 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/permute_3.c |  91 ++++++++
 gcc/testsuite/gcc.target/aarch64/sve/permute_4.c | 113 +++++++++
 5 files changed, 633 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e7bb3278a27e..102680a0efca 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1933,6 +1933,46 @@ aarch64_sve_int_mode (machine_mode mode)
   return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
 }
 
+/* Look for a vector mode with the same classification as VEC_MODE,
+   but with each group of FACTOR elements coalesced into a single element.
+   In other words, look for a mode in which the elements are FACTOR times
+   larger and in which the number of elements is FACTOR times smaller.
+
+   Return the mode found, if one exists.  */
+
+static opt_machine_mode
+aarch64_coalesce_units (machine_mode vec_mode, unsigned int factor)
+{
+  auto elt_bits = vector_element_size (GET_MODE_BITSIZE (vec_mode),
+                                      GET_MODE_NUNITS (vec_mode));
+  auto vec_flags = aarch64_classify_vector_mode (vec_mode);
+  if (vec_flags & VEC_SVE_PRED)
+    {
+      if (known_eq (GET_MODE_SIZE (vec_mode), BYTES_PER_SVE_PRED))
+       return aarch64_sve_pred_mode (elt_bits * factor);
+      return {};
+    }
+
+  scalar_mode new_elt_mode;
+  if (!int_mode_for_size (elt_bits * factor, false).exists (&new_elt_mode))
+    return {};
+
+  if (vec_flags == VEC_ADVSIMD)
+    {
+      auto mode = aarch64_simd_container_mode (new_elt_mode,
+                                              GET_MODE_BITSIZE (vec_mode));
+      if (mode != word_mode)
+       return mode;
+    }
+  else if (vec_flags & VEC_SVE_DATA)
+    {
+      poly_uint64 new_nunits;
+      if (multiple_p (GET_MODE_NUNITS (vec_mode), factor, &new_nunits))
+       return aarch64_sve_data_mode (new_elt_mode, new_nunits);
+    }
+  return {};
+}
+
 /* Implement TARGET_VECTORIZE_RELATED_MODE.  */
 
 static opt_machine_mode
@@ -25731,26 +25771,23 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
 {
   expand_vec_perm_d newd;
 
-  if (d->vec_flags != VEC_ADVSIMD)
+  /* The subregs that we'd create are not supported for big-endian SVE;
+     see aarch64_modes_compatible_p for details.  */
+  if (BYTES_BIG_ENDIAN && (d->vec_flags & VEC_ANY_SVE))
     return false;
 
   /* Get the new mode.  Always twice the size of the inner
      and half the elements.  */
-  poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
-  unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
-  auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
-  machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
-
-  if (new_mode == word_mode)
+  machine_mode new_mode;
+  if (!aarch64_coalesce_units (d->vmode, 2).exists (&new_mode))
     return false;
 
   vec_perm_indices newpermindices;
-
   if (!newpermindices.new_shrunk_vector (d->perm, 2))
     return false;
 
   newd.vmode = new_mode;
-  newd.vec_flags = VEC_ADVSIMD;
+  newd.vec_flags = d->vec_flags;
   newd.op_mode = newd.vmode;
   newd.op_vec_flags = newd.vec_flags;
   newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL;
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/permute_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/permute_1.c
new file mode 100644
index 000000000000..90aeef321882
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/permute_1.c
@@ -0,0 +1,106 @@
+/* { dg-options "-O -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVInt32_t vint32 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVFloat32_t vfloat32 __attribute__((arm_sve_vector_bits(256)));
+
+#define TESTS(TYPE)                                                    \
+  TYPE                                                                 \
+  TYPE##_zip1_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 8, 9, 2, 3, 10, 11);   \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_zip2_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 4, 5, 12, 13, 6, 7, 14, 15); \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn1_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 8, 9, 4, 5, 12, 13);   \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn2_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 2, 3, 10, 11, 6, 7, 14, 15); \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp1_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 4, 5, 8, 9, 12, 13);   \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp2_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 2, 3, 6, 7, 10, 11, 14, 15); \
+  }
+
+/*
+** vint32_zip1_d:
+**     zip1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vint32_zip2_d:
+**     zip2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vint32_trn1_d:
+**     trn1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vint32_trn2_d:
+**     trn2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vint32_uzp1_d:
+**     uzp1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vint32_uzp2_d:
+**     uzp2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+TESTS (vint32)
+
+/*
+** vfloat32_zip1_d:
+**     zip1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat32_zip2_d:
+**     zip2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat32_trn1_d:
+**     trn1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat32_trn2_d:
+**     trn2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat32_uzp1_d:
+**     uzp1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat32_uzp2_d:
+**     uzp2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+TESTS (vfloat32)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/permute_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/permute_2.c
new file mode 100644
index 000000000000..085e05e0f7f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/permute_2.c
@@ -0,0 +1,277 @@
+/* { dg-options "-O -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVUint16_t vuint16 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVFloat16_t vfloat16 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVBfloat16_t vbfloat16 __attribute__((arm_sve_vector_bits(256)));
+
+#define TESTS(TYPE)                                                    \
+  TYPE                                                                 \
+  TYPE##_zip1_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 2, 3, 16, 17, 18, 19,  \
+                                   4, 5, 6, 7, 20, 21, 22, 23);        \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_zip2_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 8, 9, 10, 11, 24, 25, 26, 27,        
\
+                                   12, 13, 14, 15, 28, 29, 30, 31);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn1_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 2, 3, 16, 17, 18, 19,  \
+                                   8, 9, 10, 11, 24, 25, 26, 27);      \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn2_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 4, 5, 6, 7, 20, 21, 22, 23,  \
+                                   12, 13, 14, 15, 28, 29, 30, 31);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp1_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 2, 3, 8, 9, 10, 11,    \
+                                   16, 17, 18, 19, 24, 25, 26, 27);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp2_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 4, 5, 6, 7, 12, 13, 14, 15,  \
+                                   20, 21, 22, 23, 28, 29, 30, 31);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_zip1_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 16, 17, 2, 3, 18, 19,  \
+                                   4, 5, 20, 21, 6, 7, 22, 23);        \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_zip2_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 8, 9, 24, 25, 10, 11, 26, 27,        
\
+                                   12, 13, 28, 29, 14, 15, 30, 31);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn1_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 16, 17, 4, 5, 20, 21,  \
+                                   8, 9, 24, 25, 12, 13, 28, 29);      \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn2_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 2, 3, 18, 19, 6, 7, 22, 23,  \
+                                   10, 11, 26, 27, 14, 15, 30, 31);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp1_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 4, 5, 8, 9, 12, 13,    \
+                                   16, 17, 20, 21, 24, 25, 28, 29);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp2_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 2, 3, 6, 7, 10, 11, 14, 15,  \
+                                   18, 19, 22, 23, 26, 27, 30, 31);    \
+  }
+
+/*
+** vuint16_zip1_d:
+**     zip1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vuint16_zip2_d:
+**     zip2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vuint16_trn1_d:
+**     trn1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vuint16_trn2_d:
+**     trn2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vuint16_uzp1_d:
+**     uzp1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vuint16_uzp2_d:
+**     uzp2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vuint16_zip1_s:
+**     zip1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vuint16_zip2_s:
+**     zip2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vuint16_trn1_s:
+**     trn1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vuint16_trn2_s:
+**     trn2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vuint16_uzp1_s:
+**     uzp1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vuint16_uzp2_s:
+**     uzp2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+TESTS (vuint16)
+
+/*
+** vfloat16_zip1_d:
+**     zip1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat16_zip2_d:
+**     zip2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat16_trn1_d:
+**     trn1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat16_trn2_d:
+**     trn2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat16_uzp1_d:
+**     uzp1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat16_uzp2_d:
+**     uzp2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vfloat16_zip1_s:
+**     zip1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vfloat16_zip2_s:
+**     zip2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vfloat16_trn1_s:
+**     trn1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vfloat16_trn2_s:
+**     trn2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vfloat16_uzp1_s:
+**     uzp1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vfloat16_uzp2_s:
+**     uzp2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+TESTS (vfloat16)
+
+/*
+** vbfloat16_zip1_d:
+**     zip1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vbfloat16_zip2_d:
+**     zip2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vbfloat16_trn1_d:
+**     trn1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vbfloat16_trn2_d:
+**     trn2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vbfloat16_uzp1_d:
+**     uzp1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vbfloat16_uzp2_d:
+**     uzp2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vbfloat16_zip1_s:
+**     zip1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vbfloat16_zip2_s:
+**     zip2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vbfloat16_trn1_s:
+**     trn1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vbfloat16_trn2_s:
+**     trn2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vbfloat16_uzp1_s:
+**     uzp1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vbfloat16_uzp2_s:
+**     uzp2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+TESTS (vbfloat16)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/permute_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/permute_3.c
new file mode 100644
index 000000000000..0a88ce0e8897
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/permute_3.c
@@ -0,0 +1,91 @@
+/* { dg-options "-O -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVInt8_t vint8 __attribute__((arm_sve_vector_bits(256)));
+
+#define TESTS(TYPE)                                                    \
+  TYPE                                                                 \
+  TYPE##_zip1_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 2, 3, 4, 5, 6, 7,      \
+                                   32, 33, 34, 35, 36, 37, 38, 39,     \
+                                   8, 9, 10, 11, 12, 13, 14, 15,       \
+                                   40, 41, 42, 43, 44, 45, 46, 47);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_zip2_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 16, 17, 18, 19, 48, 49, 50, 51, \
+                                   20, 21, 22, 23, 52, 53, 54, 55,     \
+                                   24, 25, 26, 27, 56, 57, 58, 59,     \
+                                   28, 29, 30, 31, 60, 61, 62, 63);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn1_h (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 32, 33, 4, 5, 36, 37,  \
+                                   8, 9, 40, 41, 12, 13, 44, 45,       \
+                                   16, 17, 48, 49, 20, 21, 52, 53,     \
+                                   24, 25, 56, 57, 28, 29, 60, 61);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_trn2_d (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 8, 9, 10, 11, 12, 13, 14, 15,        
\
+                                   40, 41, 42, 43, 44, 45, 46, 47,     \
+                                   24, 25, 26, 27, 28, 29, 30, 31,     \
+                                   56, 57, 58, 59, 60, 61, 62, 63);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp1_s (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 0, 1, 2, 3, 8, 9, 10, 11,    \
+                                   16, 17, 18, 19, 24, 25, 26, 27,     \
+                                   32, 33, 34, 35, 40, 41, 42, 43,     \
+                                   48, 49, 50, 51, 56, 57, 58, 59);    \
+  }                                                                    \
+                                                                       \
+  TYPE                                                                 \
+  TYPE##_uzp2_h (TYPE x, TYPE y)                                       \
+  {                                                                    \
+    return __builtin_shufflevector (x, y, 2, 3, 6, 7, 10, 11, 14, 15,  \
+                                   18, 19, 22, 23, 26, 27, 30, 31,     \
+                                   34, 35, 38, 39, 42, 43, 46, 47,     \
+                                   50, 51, 54, 55, 58, 59, 62, 63);    \
+  }
+
+/*
+** vint8_zip1_d:
+**     zip1    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vint8_zip2_s:
+**     zip2    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vint8_trn1_h:
+**     trn1    z0\.h, z0\.h, z1\.h
+**     ret
+*/
+/*
+** vint8_trn2_d:
+**     trn2    z0\.d, z0\.d, z1\.d
+**     ret
+*/
+/*
+** vint8_uzp1_s:
+**     uzp1    z0\.s, z0\.s, z1\.s
+**     ret
+*/
+/*
+** vint8_uzp2_h:
+**     uzp2    z0\.h, z0\.h, z1\.h
+**     ret
+*/
+TESTS (vint8)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/permute_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/permute_4.c
new file mode 100644
index 000000000000..a9cad7b49fa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/permute_4.c
@@ -0,0 +1,113 @@
+/* { dg-options "-O -msve-vector-bits=256 -fgimple" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVInt8_t vint8 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVBool_t vbool __attribute__((arm_sve_vector_bits(256)));
+
+/*
+** uzp1_h:
+**     uzp1    p0\.h, p0\.h, p1\.h
+**     ret
+*/
+vbool __GIMPLE
+uzp1_h (vbool x, vbool y)
+{
+  vbool z;
+
+  z = __VEC_PERM (x, y, _Literal (vint8)
+                 { 0, 1, 4, 5, 8, 9, 12, 13,
+                   16, 17, 20, 21, 24, 25, 28, 29,
+                   32, 33, 36, 37, 40, 41, 44, 45,
+                   48, 49, 52, 53, 56, 57, 60, 61 });
+  return z;
+}
+
+/*
+** uzp2_s:
+**     uzp2    p0\.s, p0\.s, p1\.s
+**     ret
+*/
+vbool __GIMPLE
+uzp2_s (vbool x, vbool y)
+{
+  vbool z;
+
+  z = __VEC_PERM (x, y, _Literal (vint8)
+                 { 4, 5, 6, 7, 12, 13, 14, 15,
+                   20, 21, 22, 23, 28, 29, 30, 31,
+                   36, 37, 38, 39, 44, 45, 46, 47,
+                   52, 53, 54, 55, 60, 61, 62, 63 });
+  return z;
+}
+
+/*
+** trn1_d:
+**     trn1    p0\.d, p0\.d, p1\.d
+**     ret
+*/
+vbool __GIMPLE
+trn1_d (vbool x, vbool y)
+{
+  vbool z;
+
+  z = __VEC_PERM (x, y, _Literal (vint8)
+                 { 0, 1, 2, 3, 4, 5, 6, 7,
+                   32, 33, 34, 35, 36, 37, 38, 39,
+                   16, 17, 18, 19, 20, 21, 22, 23,
+                   48, 49, 50, 51, 52, 53, 54, 55 });
+  return z;
+}
+
+/*
+** trn2_h:
+**     trn2    p0\.h, p0\.h, p1\.h
+**     ret
+*/
+vbool __GIMPLE
+trn2_h (vbool x, vbool y)
+{
+  vbool z;
+
+  z = __VEC_PERM (x, y, _Literal (vint8)
+                 { 2, 3, 34, 35, 6, 7, 38, 39,
+                   10, 11, 42, 43, 14, 15, 46, 47,
+                   18, 19, 50, 51, 22, 23, 54, 55,
+                   26, 27, 58, 59, 30, 31, 62, 63 });
+  return z;
+}
+
+/*
+** zip1_d:
+**     zip1    p0\.d, p0\.d, p1\.d
+**     ret
+*/
+vbool __GIMPLE
+zip1_d (vbool x, vbool y)
+{
+  vbool z;
+
+  z = __VEC_PERM (x, y, _Literal (vint8)
+                 { 0, 1, 2, 3, 4, 5, 6, 7,
+                   32, 33, 34, 35, 36, 37, 38, 39,
+                   8, 9, 10, 11, 12, 13, 14, 15,
+                   40, 41, 42, 43, 44, 45, 46, 47 });
+  return z;
+}
+
+/*
+** zip2_s:
+**     zip2    p0\.s, p0\.s, p1\.s
+**     ret
+*/
+vbool __GIMPLE
+zip2_s (vbool x, vbool y)
+{
+  vbool z;
+
+  z = __VEC_PERM (x, y, _Literal (vint8)
+                 { 16, 17, 18, 19, 48, 49, 50, 51,
+                   20, 21, 22, 23, 52, 53, 54, 55,
+                   24, 25, 26, 27, 56, 57, 58, 59,
+                   28, 29, 30, 31, 60, 61, 62, 63 });
+  return z;
+}

Reply via email to