When AVX512 uses a fully masked loop and peeling we fail to create the
correct initial loop mask when the mask is composed of multiple
components in some cases.  The following fixes this by properly applying
the bias for the component to the shift amount.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

        PR tree-optimization/115843
        * tree-vect-loop-manip.cc
        (vect_set_loop_condition_partial_vectors_avx512): Properly
        bias the shift of the initial mask for alignment peeling.

        * gcc.dg/vect/pr115843.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr115843.c | 40 ++++++++++++++++++++++++++++
 gcc/tree-vect-loop-manip.cc          |  8 ++++--
 2 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr115843.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr115843.c 
b/gcc/testsuite/gcc.dg/vect/pr115843.c
new file mode 100644
index 00000000000..f829d90b1ad
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr115843.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options "-mavx512vl --param vect-partial-vector-usage=2" { 
target { avx512f_runtime && avx512vl } } } */
+
+#include "tree-vect.h"
+
+typedef __UINT64_TYPE__ BITBOARD;
+BITBOARD KingPressureMask1[64], KingSafetyMask1[64];
+
+void __attribute__((noinline))
+foo()
+{
+  for (int i = 0; i < 64; i++)
+    {
+      if ((i & 7) == 0)
+       KingPressureMask1[i] = KingSafetyMask1[i + 1];
+      else if ((i & 7) == 7)
+       KingPressureMask1[i] = KingSafetyMask1[i - 1];
+      else
+       KingPressureMask1[i] = KingSafetyMask1[i];
+    }
+}
+
+BITBOARD verify[64]
+  = {1, 1, 2, 3, 4, 5, 6, 6, 9, 9, 10, 11, 12, 13, 14, 14, 17, 17, 18, 19,
+    20, 21, 22, 22, 25, 25, 26, 27, 28, 29, 30, 30, 33, 33, 34, 35, 36, 37, 38,
+    38, 41, 41, 42, 43, 44, 45, 46, 46, 49, 49, 50, 51, 52, 53, 54, 54, 57, 57,
+    58, 59, 60, 61, 62, 62};
+
+int main()
+{
+  check_vect ();
+
+#pragma GCC novect
+  for (int i = 0; i < 64; ++i)
+    KingSafetyMask1[i] = i;
+  foo ();
+  for (int i = 0; i < 64; ++i)
+    if (KingPressureMask1[i] != verify[i])
+      __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index ac13873cd88..57dbcbe862c 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1149,10 +1149,14 @@ vect_set_loop_condition_partial_vectors_avx512 (class 
loop *loop,
              /* ???  But when the shift amount isn't constant this requires
                 a round-trip to GRPs.  We could apply the bias to either
                 side of the compare instead.  */
-             tree shift = gimple_build (&preheader_seq, MULT_EXPR,
+             tree shift = gimple_build (&preheader_seq, MINUS_EXPR,
                                         TREE_TYPE (niters_skip), niters_skip,
                                         build_int_cst (TREE_TYPE (niters_skip),
-                                                       
rgc.max_nscalars_per_iter));
+                                                       bias));
+             shift = gimple_build (&preheader_seq, MULT_EXPR,
+                                   TREE_TYPE (niters_skip), shift,
+                                   build_int_cst (TREE_TYPE (niters_skip),
+                                                  rgc.max_nscalars_per_iter));
              init_ctrl = gimple_build (&preheader_seq, LSHIFT_EXPR,
                                        TREE_TYPE (init_ctrl),
                                        init_ctrl, shift);
-- 
2.35.3

Reply via email to