The vectorizer thinks it can align a vector access to 16 bytes when using a vectorization factor of 8 and 1 byte elements. That of course does not work for the 2nd vector iteration. Apparently we lack a guard against such nonsense.
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. PR tree-optimization/118749 * tree-vect-data-refs.cc (vector_alignment_reachable_p): Pass in the vectorization factor, when that cannot maintain the DRs target alignment do not claim we can reach that by peeling. * gcc.dg/vect/pr118749.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr118749.c | 41 ++++++++++++++++++++++++++++ gcc/tree-vect-data-refs.cc | 24 ++++++++++------ 2 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr118749.c diff --git a/gcc/testsuite/gcc.dg/vect/pr118749.c b/gcc/testsuite/gcc.dg/vect/pr118749.c new file mode 100644 index 00000000000..eed8bd0d7e0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr118749.c @@ -0,0 +1,41 @@ +/* { dg-additional-options "-mtune=pentium4" { target ia32 } } */ + +#include "tree-vect.h" + +typedef unsigned char FcChar8; +typedef unsigned short FcChar16; +typedef unsigned int FcChar32; +typedef int FcBool; + +#define FcFalse 0 +#define FcTrue 1 +#define FcDontCare 2 + +__attribute__((noipa)) +static FcBool +FcLooksLikeSJIS (FcChar8 *string, int len) +{ + int nhigh = 0, nlow = 0; + + while (len-- > 0) + { + if (*string++ & 0x80) nhigh++; + else nlow++; + } + /* + * Heuristic -- if more than 1/3 of the bytes have the high-bit set, + * this is likely to be SJIS and not ROMAN + */ + if (nhigh * 2 > nlow) + return FcTrue; + return FcFalse; +} + +int main() +{ + check_vect (); + unsigned char* s = "DejaVuMathTeXGyre-Regulardtd!"; + if (FcLooksLikeSJIS(s, 29)) + abort (); + return 0; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 6eda40267bd..6d5854ac7c7 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -1722,31 +1722,37 @@ not_size_aligned (tree exp) a few loop iterations. Return false otherwise. */ static bool -vector_alignment_reachable_p (dr_vec_info *dr_info) +vector_alignment_reachable_p (dr_vec_info *dr_info, poly_uint64 vf) { stmt_vec_info stmt_info = dr_info->stmt; tree vectype = STMT_VINFO_VECTYPE (stmt_info); + poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype); + poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)); + unsigned elem_size = vector_element_size (vector_size, nelements); + unsigned group_size = 1; if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) { /* For interleaved access we peel only if number of iterations in the prolog loop ({VF - misalignment}), is a multiple of the number of the interleaved accesses. */ - int elem_size, mis_in_elements; /* FORNOW: handle only known alignment. */ if (!known_alignment_for_access_p (dr_info, vectype)) return false; - poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype); - poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype)); - elem_size = vector_element_size (vector_size, nelements); - mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size; - + unsigned mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size; if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info))) return false; + + group_size = DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); } + /* If the vectorization factor does not guarantee DR advancement of + a multiple of the target alignment no peeling will help. */ + if (!multiple_p (elem_size * group_size * vf, dr_target_alignment (dr_info))) + return false; + /* If misalignment is known at the compile time then allow peeling only if natural alignment is reachable through peeling. */ if (known_alignment_for_access_p (dr_info, vectype) @@ -2346,7 +2352,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) stmt_vec_info stmt_info = dr_info->stmt; tree vectype = STMT_VINFO_VECTYPE (stmt_info); - do_peeling = vector_alignment_reachable_p (dr_info); + do_peeling + = vector_alignment_reachable_p (dr_info, + LOOP_VINFO_VECT_FACTOR (loop_vinfo)); if (do_peeling) { if (known_alignment_for_access_p (dr_info, vectype)) -- 2.43.0