When adding partial SVE modes, I'd remembered to handle reloads
in a similar way to full big-endian SVE vectors, but forgot the
just-as-important mode-change rules.

Tested on aarch64-linux-gnu and applied as r279572.

Richard


2019-12-19  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * config/aarch64/aarch64.c (aarch64_can_change_mode_class):
        Don't allow changes between partial SVE modes and other kinds
        of mode.  Don't allow changes between two partial SVE modes
        if they have different container or element sizes.

gcc/testsuite/
        * gcc.target/aarch64/sve/mixed_size_8.c: New test.

Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c        2019-12-19 13:24:59.093288717 +0000
+++ gcc/config/aarch64/aarch64.c        2019-12-19 13:35:47.816959437 +0000
@@ -21457,11 +21457,30 @@ aarch64_compute_pressure_classes (reg_cl
 aarch64_can_change_mode_class (machine_mode from,
                               machine_mode to, reg_class_t)
 {
+  unsigned int from_flags = aarch64_classify_vector_mode (from);
+  unsigned int to_flags = aarch64_classify_vector_mode (to);
+
+  bool from_sve_p = (from_flags & VEC_ANY_SVE);
+  bool to_sve_p = (to_flags & VEC_ANY_SVE);
+
+  bool from_partial_sve_p = from_sve_p && (from_flags & VEC_PARTIAL);
+  bool to_partial_sve_p = to_sve_p && (to_flags & VEC_PARTIAL);
+
+  /* Don't allow changes between partial SVE modes and other modes.
+     The contents of partial SVE modes are distributed evenly across
+     the register, whereas GCC expects them to be clustered together.  */
+  if (from_partial_sve_p != to_partial_sve_p)
+    return false;
+
+  /* Similarly reject changes between partial SVE modes that have
+     different patterns of significant and insignificant bits.  */
+  if (from_partial_sve_p
+      && (aarch64_sve_container_bits (from) != aarch64_sve_container_bits (to)
+         || GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to)))
+    return false;
+
   if (BYTES_BIG_ENDIAN)
     {
-      bool from_sve_p = aarch64_sve_data_mode_p (from);
-      bool to_sve_p = aarch64_sve_data_mode_p (to);
-
       /* Don't allow changes between SVE data modes and non-SVE modes.
         See the comment at the head of aarch64-sve.md for details.  */
       if (from_sve_p != to_sve_p)
Index: gcc/testsuite/gcc.target/aarch64/sve/mixed_size_8.c
===================================================================
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/mixed_size_8.c 2019-12-19 
13:35:47.816959437 +0000
@@ -0,0 +1,34 @@
+/* { dg-options "-O2 -msve-vector-bits=512" } */
+
+typedef int int32x16_t __attribute__((vector_size(64)));
+typedef int int32x8_t __attribute__((vector_size(32)));
+
+int32x8_t
+f1 (int32x16_t x)
+{
+  union u { int32x16_t full; int32x8_t pair[2]; } u;
+  u.full = x | 2;
+  return u.pair[0] + (int32x8_t) { 1, 2, 3, 4, 5, 6, 7, 8 };
+}
+
+int32x8_t
+f2 (int32x16_t x)
+{
+  union u { int32x16_t full; int32x8_t pair[2]; } u;
+  u.full = x | 2;
+  return u.pair[1] + (int32x8_t) { 1, 2, 3, 4, 5, 6, 7, 8 };
+}
+
+/* We could do something more efficient than spill the int32x16_t and
+   reload the int32x8_t.  The important thing is that we don't do
+   something like:
+
+       orr     z0.s, z0.s, #2
+       index   z1.d, #1, #1
+       add     z0.s, z0.s, z1.s
+       st1w    z0.d, p0, [x8]
+
+   We're supposed to add z1 to one half of the ORR result instead.  */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d} 2 } } */

Reply via email to