The INDEX patterns handle partial modes by choosing the container
size rather than the element size, so that the number of lanes
(and thus number of additions) matches the mode.  This means that
all VNx4 modes use .s and all VNx2 modes use .d, etc.

When adding this, I'd forgotten that the choice between Wn and Xn
registers would need to be updated to use the container size too.
For partial VNx2s, we were using .d containers with Wn rather than
Xn source registers.

Tested on aarch64-linux-gnu, applied as r279173.

Richard


2019-12-10  Richard Sandiford  <richard.sandif...@arm.com>

gcc/
        * config/aarch64/iterators.md (vccore): New iterator.
        * config/aarch64/aarch64-sve.md (vec_series<mode>): Use it instead
        of vwcore.
        (*vec_series<mode>_plus): Likewise.

gcc/testsuite/
        * gcc.target/aarch64/sve/mixed_size_6.c: New test.

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md     2019-11-18 15:36:04.861884957 +0000
+++ gcc/config/aarch64/iterators.md     2019-12-10 16:31:31.328032388 +0000
@@ -1093,6 +1093,12 @@ (define_mode_attr vwcore [(V8QI "w") (V1
                          (VNx2DI "x")
                          (VNx2DF "x")])
 
+;; Like vwcore, but for the container mode rather than the element mode.
+(define_mode_attr vccore [(VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "x")
+                         (VNx8HI "w") (VNx4HI "w") (VNx2HI "x")
+                         (VNx4SI "w") (VNx2SI "x")
+                         (VNx2DI "x")])
+
 ;; Double vector types for ALLX.
 (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
 
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md   2019-11-16 13:31:24.342304673 +0000
+++ gcc/config/aarch64/aarch64-sve.md   2019-12-10 16:31:31.328032388 +0000
@@ -2541,9 +2541,9 @@ (define_insn "vec_series<mode>"
          (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
   "TARGET_SVE"
   "@
-   index\t%0.<Vctype>, #%1, %<vwcore>2
-   index\t%0.<Vctype>, %<vwcore>1, #%2
-   index\t%0.<Vctype>, %<vwcore>1, %<vwcore>2"
+   index\t%0.<Vctype>, #%1, %<vccore>2
+   index\t%0.<Vctype>, %<vccore>1, #%2
+   index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
 )
 
 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
@@ -2557,7 +2557,7 @@ (define_insn "*vec_series<mode>_plus"
   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
   {
     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
-    return "index\t%0.<Vctype>, %<vwcore>1, #%2";
+    return "index\t%0.<Vctype>, %<vccore>1, #%2";
   }
 )
 
Index: gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c
===================================================================
--- /dev/null   2019-09-17 11:41:18.176664108 +0100
+++ gcc/testsuite/gcc.target/aarch64/sve/mixed_size_6.c 2019-12-10 
16:31:31.328032388 +0000
@@ -0,0 +1,47 @@
+/* { dg-options "-O3 -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+void
+f1 (uint64_t *restrict ptr1, uint8_t *restrict ptr2, uint8_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 1;
+    }
+}
+
+void
+f2 (uint64_t *restrict ptr1, uint16_t *restrict ptr2, uint16_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 2;
+    }
+}
+
+void
+f3 (uint64_t *restrict ptr1, uint32_t *restrict ptr2, uint32_t start)
+{
+#pragma GCC unroll 0
+  for (int i = 0; i < 4; ++i)
+    {
+      ptr1[i] = 10;
+      ptr2[i] = start;
+      start += 4;
+    }
+}
+
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.d, x[0-9]+, #4\n} } } */
+
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #1\n} } } */
+/* { dg-final { scan-assembler-not {\tindex\tz[0-9]+\.d, w[0-9]+, #4\n} } } */

Reply via email to