The vectorizer thinks it can align a vector access to 16 bytes when
using a vectorization factor of 8 and 1 byte elements.  That of
course does not work for the 2nd vector iteration.  Apparently we
lack a guard against such nonsense.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

        PR tree-optimization/118749
        * tree-vect-data-refs.cc (vector_alignment_reachable_p): Pass
        in the vectorization factor, when that cannot maintain
        the DRs target alignment do not claim we can reach that
        by peeling.

        * gcc.dg/vect/pr118749.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr118749.c | 41 ++++++++++++++++++++++++++++
 gcc/tree-vect-data-refs.cc           | 24 ++++++++++------
 2 files changed, 57 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr118749.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr118749.c 
b/gcc/testsuite/gcc.dg/vect/pr118749.c
new file mode 100644
index 00000000000..eed8bd0d7e0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr118749.c
@@ -0,0 +1,41 @@
+/* { dg-additional-options "-mtune=pentium4" { target ia32 } } */
+
+#include "tree-vect.h"
+
+typedef unsigned char FcChar8;
+typedef unsigned short FcChar16;
+typedef unsigned int FcChar32;
+typedef int FcBool;
+
+#define FcFalse 0
+#define FcTrue 1
+#define FcDontCare 2
+
+__attribute__((noipa))
+static FcBool
+FcLooksLikeSJIS (FcChar8 *string, int len)
+{
+    int     nhigh = 0, nlow = 0;
+
+    while (len-- > 0)
+    {
+        if (*string++ & 0x80) nhigh++;
+        else nlow++;
+    }
+    /*
+     * Heuristic -- if more than 1/3 of the bytes have the high-bit set,
+     * this is likely to be SJIS and not ROMAN
+     */
+    if (nhigh * 2 > nlow)
+        return FcTrue;
+    return FcFalse;
+}
+
+int main()
+{
+  check_vect ();
+  unsigned char* s = "DejaVuMathTeXGyre-Regulardtd!";
+  if (FcLooksLikeSJIS(s, 29))
+    abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 6eda40267bd..6d5854ac7c7 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1722,31 +1722,37 @@ not_size_aligned (tree exp)
    a few loop iterations.  Return false otherwise.  */
 
 static bool
-vector_alignment_reachable_p (dr_vec_info *dr_info)
+vector_alignment_reachable_p (dr_vec_info *dr_info, poly_uint64 vf)
 {
   stmt_vec_info stmt_info = dr_info->stmt;
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
+  poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+  unsigned elem_size = vector_element_size (vector_size, nelements);
+  unsigned group_size = 1;
 
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
       /* For interleaved access we peel only if number of iterations in
         the prolog loop ({VF - misalignment}), is a multiple of the
         number of the interleaved accesses.  */
-      int elem_size, mis_in_elements;
 
       /* FORNOW: handle only known alignment.  */
       if (!known_alignment_for_access_p (dr_info, vectype))
        return false;
 
-      poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
-      poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
-      elem_size = vector_element_size (vector_size, nelements);
-      mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
-
+      unsigned mis_in_elements = dr_misalignment (dr_info, vectype) / 
elem_size;
       if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
        return false;
+
+      group_size = DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
     }
 
+  /* If the vectorization factor does not guarantee DR advancement of
+     a multiple of the target alignment no peeling will help.  */
+  if (!multiple_p (elem_size * group_size * vf, dr_target_alignment (dr_info)))
+    return false;
+
   /* If misalignment is known at the compile time then allow peeling
      only if natural alignment is reachable through peeling.  */
   if (known_alignment_for_access_p (dr_info, vectype)
@@ -2346,7 +2352,9 @@ vect_enhance_data_refs_alignment (loop_vec_info 
loop_vinfo)
 
       stmt_vec_info stmt_info = dr_info->stmt;
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-      do_peeling = vector_alignment_reachable_p (dr_info);
+      do_peeling
+       = vector_alignment_reachable_p (dr_info,
+                                       LOOP_VINFO_VECT_FACTOR (loop_vinfo));
       if (do_peeling)
         {
          if (known_alignment_for_access_p (dr_info, vectype))
-- 
2.43.0

Reply via email to