Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk sofar.
Richard. 2017-09-06 Richard Biener <rguent...@suse.de> PR tree-optimization/82108 * tree-vect-stmts.c (vectorizable_load): Fix pointer adjustment for gap in the non-permutation SLP case. * gcc.dg/vect/pr82108.c: New testcase. Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 251642) +++ gcc/tree-vect-stmts.c (working copy) @@ -7203,7 +7203,6 @@ vectorizable_load (gimple *stmt, gimple_ { first_stmt = GROUP_FIRST_ELEMENT (stmt_info); group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); - int group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt)); /* For SLP vectorization we directly vectorize a subchain without permutation. */ if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) @@ -7246,7 +7245,8 @@ vectorizable_load (gimple *stmt, gimple_ else { vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - group_gap_adj = group_gap; + group_gap_adj + = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance); } } else Index: gcc/testsuite/gcc.dg/vect/pr82108.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr82108.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr82108.c (working copy) @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_float } */ + +#include "tree-vect.h" + +void __attribute__((noinline,noclone)) +downscale_2 (const float* src, int src_n, float* dst) +{ + int i; + + for (i = 0; i < src_n; i += 2) { + const float* a = src; + const float* b = src + 4; + + dst[0] = (a[0] + b[0]) / 2; + dst[1] = (a[1] + b[1]) / 2; + dst[2] = (a[2] + b[2]) / 2; + dst[3] = (a[3] + b[3]) / 2; + + src += 2 * 4; + dst += 4; + } +} + +int main () +{ + const float in[4 * 4] = { + 1, 2, 3, 4, + 5, 6, 7, 8, + + 1, 2, 3, 4, + 5, 6, 7, 8 + }; + float out[2 * 4]; + + check_vect (); + + downscale_2 (in, 4, out); + + if (out[0] != 3 || out[1] != 4 || out[2] != 5 || out[3] != 6 + || out[4] != 3 || out[5] != 4 || out[6] != 5 || out[7] != 6) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */