Hi!

The following testcases are miscompiled, because expand_vec_perm_pshufb
incorrectly thinks it can use vpshufb instruction for the permutations
when it can't.
The
          if (vmode == V32QImode)
            {
              /* vpshufb only works intra lanes, it is not
                 possible to shuffle bytes in between the lanes.  */
              for (i = 0; i < nelt; ++i)
                if ((d->perm[i] ^ i) & (nelt / 2))
                  return false;
            }
intra-lane check which is correct has been copied and adjusted for 64-byte
modes into:
          if (vmode == V64QImode)
            {
              /* vpshufb only works intra lanes, it is not
                 possible to shuffle bytes in between the lanes.  */
              for (i = 0; i < nelt; ++i)
                if ((d->perm[i] ^ i) & (nelt / 4))
                  return false;
            }
which is not correct, because 64-byte modes have 4 lanes rather than just
two and the above is only testing that the permutation grabs even lane elts
from even lanes and odd lane elts from odd lanes, but not that they are
from the same 256-bit half.

The following patch fixes it by using 3 * nelt / 4 instead of nelt / 4,
so we actually check the most significant 2 bits rather than just one.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
release branches?

2020-04-07  Jakub Jelinek  <ja...@redhat.com>

        PR target/94509
        * config/i386/i386-expand.c (expand_vec_perm_pshufb): Fix the check
        for inter-lane permutation for 64-byte modes.

        * gcc.target/i386/avx512bw-pr94509-1.c: New test.
        * gcc.target/i386/avx512bw-pr94509-2.c: New test.

--- gcc/config/i386/i386-expand.c.jj    2020-04-07 08:27:13.770891167 +0200
+++ gcc/config/i386/i386-expand.c       2020-04-07 10:40:09.754089204 +0200
@@ -16781,7 +16781,7 @@ expand_vec_perm_pshufb (struct expand_ve
              /* vpshufb only works intra lanes, it is not
                 possible to shuffle bytes in between the lanes.  */
              for (i = 0; i < nelt; ++i)
-               if ((d->perm[i] ^ i) & (nelt / 4))
+               if ((d->perm[i] ^ i) & (3 * nelt / 4))
                  return false;
            }
        }
--- gcc/testsuite/gcc.target/i386/avx512bw-pr94509-1.c.jj       2020-04-07 
10:42:58.560566492 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr94509-1.c  2020-04-07 
10:44:44.010990594 +0200
@@ -0,0 +1,30 @@
+/* PR target/94509 */
+/* { dg-do run { target avx512bw } } */
+/* { dg-options "-O2 -mavx512bw" } */
+
+#define AVX512BW
+#include "avx512f-helper.h"
+
+typedef unsigned short __attribute__ ((__vector_size__ (64))) V;
+
+__attribute__((noipa)) V
+foo (V x)
+{
+  return __builtin_shuffle (x, (V) { 0, 0, 0, 0, 0, 0, 0, 0,
+                                    15, 15, 15, 15, 15, 15, 15, 15,
+                                    0, 0, 0, 0, 0, 0, 0, 0,
+                                    15, 15, 15, 15, 15, 15, 15, 15 });
+}
+
+static void
+TEST (void)
+{
+  V v = foo ((V) { 1, 2, 3, 4, 5, 6, 7, 8,
+                  9, 10, 11, 12, 13, 14, 15, 16,
+                  17, 18, 19, 20, 21, 22, 23, 24,
+                  25, 26, 27, 28, 29, 30, 31, 32 });
+  unsigned int i;
+  for (i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+    if (v[i] != ((i & 8) ? 16 : 1))
+      abort ();
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr94509-2.c.jj       2020-04-07 
10:43:01.739518984 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr94509-2.c  2020-04-07 
10:44:49.811903904 +0200
@@ -0,0 +1,38 @@
+/* PR target/94509 */
+/* { dg-do run { target avx512bw } } */
+/* { dg-options "-O2 -mavx512bw" } */
+
+#define AVX512BW
+#include "avx512f-helper.h"
+
+typedef unsigned char __attribute__ ((__vector_size__ (64))) V;
+
+__attribute__((noipa)) V
+foo (V x)
+{
+  return __builtin_shuffle (x, (V) { 0, 1, 0, 1, 0, 1, 0, 1,
+                                    0, 1, 0, 1, 0, 1, 0, 1,
+                                    30, 31, 30, 31, 30, 31, 30, 31,
+                                    30, 31, 30, 31, 30, 31, 30, 31,
+                                    0, 1, 0, 1, 0, 1, 0, 1,
+                                    0, 1, 0, 1, 0, 1, 0, 1,
+                                    30, 31, 30, 31, 30, 31, 30, 31,
+                                    30, 31, 30, 31, 30, 31, 30, 31 });
+}
+
+static void
+TEST (void)
+{
+  V v = foo ((V) { 1, 2, 3, 4, 5, 6, 7, 8,
+                  9, 10, 11, 12, 13, 14, 15, 16,
+                  17, 18, 19, 20, 21, 22, 23, 24,
+                  25, 26, 27, 28, 29, 30, 31, 32,
+                  33, 34, 35, 36, 37, 38, 39, 40,
+                  41, 42, 43, 44, 45, 46, 47, 48,
+                  49, 50, 51, 52, 53, 54, 55, 56,
+                  57, 58, 59, 60, 61, 62, 63, 64 });
+  unsigned int i;
+  for (i = 0; i < sizeof (v) / sizeof (v[0]); i++)
+    if (v[i] != ((i & 16) ? 31 : 1) + (i & 1))
+      abort ();
+}

        Jakub

Reply via email to