With -msve-vector-bits=N, the payload of some partial SVE modes can
be 16 bytes or smaller, which makes them small enough to fit in a
pair of GPRs.  We specifically don't want that, because the payload
is distributed evenly across the SVE register rather than collected
at one end.  Marshalling it into a GPR via register operations would
be expensive.

Tested on aarch64-linux-gnu, applied as r279174.

Richard


2019-12-10  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * config/aarch64/aarch64.c (aarch64_hard_regno_mode_ok): Don't
        allow SVE modes in GPRs.

gcc/testsuite/
        * gcc.target/aarch64/sve/mixed_size_7.c: New test.

Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c        2019-12-05 14:20:17.409060413 +0000
+++ gcc/config/aarch64/aarch64.c        2019-12-10 16:45:42.794317637 +0000
@@ -2019,9 +2019,11 @@ aarch64_hard_regno_mode_ok (unsigned reg
 
   if (GP_REGNUM_P (regno))
     {
+      if (vec_flags & VEC_ANY_SVE)
+       return false;
       if (known_le (GET_MODE_SIZE (mode), 8))
        return true;
-      else if (known_le (GET_MODE_SIZE (mode), 16))
+      if (known_le (GET_MODE_SIZE (mode), 16))
        return (regno & 1) == 0;
     }
   else if (FP_REGNUM_P (regno))
Index: gcc/testsuite/gcc.target/aarch64/sve/mixed_size_7.c
===================================================================
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/mixed_size_7.c 2019-12-10 
16:45:42.794317637 +0000
@@ -0,0 +1,28 @@
+/* Originally gcc.dg/vect/bb-slp-6.c */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 
-fno-vect-cost-model" } */
+
+#define N 16
+
+unsigned int out[N];
+unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+__attribute__ ((noinline)) int
+main1 (unsigned int x, unsigned int y)
+{
+  int i;
+  unsigned int *pin = &in[0];
+  unsigned int *pout = &out[0];
+  unsigned int a0, a1, a2, a3;
+
+  a0 = *pin++ + 23;
+  a1 = *pin++ + 142;
+  a2 = *pin++ + 2;
+  a3 = *pin++ + 31;
+
+  *pout++ = a0 * x;
+  *pout++ = a1 * y;
+  *pout++ = a2 * x;
+  *pout++ = a3 * y;
+
+  return 0;
+}

Reply via email to