https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79824
--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
Ok, so in the SLP case this is actually safe because we only ever load vectors
that end up having at least a single element used (we do not load from unused
gaps). In the non-SLP case I botched up the patch, plain interleaving with
a single-element is rejected with gaps bigger than vector size (whoo!) and
with multi-element I failed to properly use 'first_stmt' for the alignment
test and thus we always get peeling for gaps.
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c (revision 245908)
+++ gcc/tree-vect-stmts.c (working copy)
@@ -1796,7 +1796,8 @@ get_group_load_store_type (gimple *stmt,
bool would_overrun_p = (gap != 0);
/* If the access is aligned an overrun is fine. */
if (would_overrun_p
- && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
+ && aligned_access_p
+ (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
would_overrun_p = false;
if (!STMT_VINFO_STRIDED_P (stmt_info)
&& (can_overrun_p || !would_overrun_p)
makes
void __attribute__((noinline))
foo (TYPE *__restrict a, TYPE *__restrict b)
{
int n;
b = __builtin_assume_aligned (b, sizeof (TYPE) * 2);
a = __builtin_assume_aligned (a, sizeof (TYPE) * 2);
for (n = 0; n < COUNT; n++)
{
a[n] = b[n * 4] + b[n * 4 + 1];
}
}
fail on x86_64. So I am testing
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c (revision 245908)
+++ gcc/tree-vect-stmts.c (working copy)
@@ -1731,7 +1731,7 @@ get_group_load_store_type (gimple *stmt,
bool single_element_p = (stmt == first_stmt
&& !GROUP_NEXT_ELEMENT (stmt_info));
unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
- int nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
/* True if the vectorized statements would access beyond the last
statement in the group. */
@@ -1794,9 +1794,13 @@ get_group_load_store_type (gimple *stmt,
/* If there is a gap at the end of the group then these optimizations
would access excess elements in the last iteration. */
bool would_overrun_p = (gap != 0);
- /* If the access is aligned an overrun is fine. */
+ /* If the access is aligned an overrun is fine, but only if the
+ overrun is not inside an unused vector (if the gap is as large
+ or larger than a vector). */
if (would_overrun_p
- && aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
+ && gap < nunits
+ && aligned_access_p
+ (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
would_overrun_p = false;
if (!STMT_VINFO_STRIDED_P (stmt_info)
&& (can_overrun_p || !would_overrun_p)