This fixes an oversight in strided permuted SLP loads which miscomputed the number of required loads.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2016-01-12 Richard Biener <rguent...@suse.de> PR tree-optimization/69174 * tree-vect-stmts.c (vect_mark_relevant): Remove excessive vertical space. (vectorizable_load): Properly compute the number of loads needed for permuted strided SLP loads and do not spuriously assign to SLP_TREE_VEC_STMTS. * gcc.dg/torture/pr69174.c: New testcase. Index: gcc/tree-vect-stmts.c =================================================================== *** gcc/tree-vect-stmts.c (revision 232213) --- gcc/tree-vect-stmts.c (working copy) *************** vect_mark_relevant (vec<gimple *> *workl *** 190,197 **** gimple *pattern_stmt; if (dump_enabled_p ()) ! dump_printf_loc (MSG_NOTE, vect_location, ! "mark relevant %d, live %d.\n", relevant, live_p); /* If this stmt is an original stmt in a pattern, we might need to mark its related pattern stmt instead of the original stmt. However, such stmts --- 190,200 ---- gimple *pattern_stmt; if (dump_enabled_p ()) ! { ! dump_printf_loc (MSG_NOTE, vect_location, ! "mark relevant %d, live %d: ", relevant, live_p); ! dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); ! } /* If this stmt is an original stmt in a pattern, we might need to mark its related pattern stmt instead of the original stmt. However, such stmts *************** vectorizable_load (gimple *stmt, gimple_ *** 6748,6756 **** else ltype = vectype; ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); ! ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); if (slp_perm) ! dr_chain.create (ncopies); } for (j = 0; j < ncopies; j++) { --- 6751,6766 ---- else ltype = vectype; ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); ! /* For SLP permutation support we need to load the whole group, ! not only the number of vector stmts the permutation result ! fits in. */ if (slp_perm) ! { ! ncopies = (group_size * vf + nunits - 1) / nunits; ! dr_chain.create (ncopies); ! } ! else ! ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); } for (j = 0; j < ncopies; j++) { *************** vectorizable_load (gimple *stmt, gimple_ *** 6798,6806 **** if (slp) { - SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); if (slp_perm) dr_chain.quick_push (gimple_assign_lhs (new_stmt)); } else { --- 6808,6817 ---- if (slp) { if (slp_perm) dr_chain.quick_push (gimple_assign_lhs (new_stmt)); + else + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); } else { Index: gcc/testsuite/gcc.dg/torture/pr69174.c =================================================================== *** gcc/testsuite/gcc.dg/torture/pr69174.c (revision 0) --- gcc/testsuite/gcc.dg/torture/pr69174.c (working copy) *************** *** 0 **** --- 1,19 ---- + /* { dg-do compile } */ + + typedef int pixval; + typedef struct { pixval r, g, b; } xel; + int convertRow_sample, convertRaster_col; + short *convertRow_samplebuf; + xel *convertRow_xelrow; + short convertRow_spp; + void fn1() { + int *alpharow; + for (; convertRaster_col; + ++convertRaster_col, convertRow_sample += convertRow_spp) { + convertRow_xelrow[convertRaster_col].r = + convertRow_xelrow[convertRaster_col].g = + convertRow_xelrow[convertRaster_col].b = + convertRow_samplebuf[convertRow_sample]; + alpharow[convertRaster_col] = convertRow_samplebuf[convertRow_sample + 3]; + } + }