Well - only those with no gaps in the groups with this patch. More as followup.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-05-06 Richard Biener <[email protected]> * tree-vect-slp.c (vect_supported_load_permutation_p): Use vect_transform_slp_perm_load to check if we support a permutation for basic-block vectorization. * gcc.dg/vect/bb-slp-34.c: New testcase. Index: gcc/tree-vect-slp.c =================================================================== *** gcc/tree-vect-slp.c (revision 222758) --- gcc/tree-vect-slp.c (working copy) *************** vect_supported_load_permutation_p (slp_i *** 1313,1329 **** FORNOW: not supported in loop SLP because of realignment compications. */ if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt))) { ! /* Check that for every node in the instance the loads ! form a subchain. */ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) { next_load = NULL; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load) { if (j != 0 && next_load != load) ! return false; next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load)); } } /* Check that the alignment of the first load in every subchain, i.e., --- 1313,1348 ---- FORNOW: not supported in loop SLP because of realignment compications. */ if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt))) { ! /* Check whether the loads in an instance form a subchain and thus ! no permutation is necessary. */ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) { + bool subchain_p = true; next_load = NULL; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load) { if (j != 0 && next_load != load) ! { ! subchain_p = false; ! break; ! } next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load)); } + if (subchain_p) + SLP_TREE_LOAD_PERMUTATION (node).release (); + else + { + /* Verify the permutation can be generated. */ + vec<tree> tem; + if (!vect_transform_slp_perm_load (node, tem, NULL, + 1, slp_instn, true)) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "unsupported load permutation\n"); + return false; + } + } } /* Check that the alignment of the first load in every subchain, i.e., *************** vect_supported_load_permutation_p (slp_i *** 1352,1360 **** } } - /* We are done, no actual permutations need to be generated. */ - FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) - SLP_TREE_LOAD_PERMUTATION (node).release (); return true; } --- 1371,1376 ---- Index: gcc/testsuite/gcc.dg/vect/bb-slp-34.c =================================================================== *** gcc/testsuite/gcc.dg/vect/bb-slp-34.c (revision 0) --- gcc/testsuite/gcc.dg/vect/bb-slp-34.c (working copy) *************** *** 0 **** --- 1,37 ---- + /* { dg-require-effective-target vect_int } */ + + #include "tree-vect.h" + + extern void abort (void); + + int a[8], b[8]; + + void __attribute__((noinline,noclone)) + foo(void) + { + a[0] = b[3]; + a[1] = b[2]; + a[2] = b[1]; + a[3] = b[0]; + a[4] = b[2]; + a[5] = b[3]; + a[6] = b[4]; + a[7] = b[5]; + } + + int main() + { + int i; + check_vect (); + for (i = 0; i < 8; ++i) + b[i] = i; + foo (); + if (a[0] != 3 || a[1] != 2 || a[2] != 1 || a[3] != 0 + || a[4] != 2 || a[5] != 3 || a[6] != 4 || a[7] != 5) + abort (); + return 0; + } + + /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */ + /* { dg-final { cleanup-tree-dump "slp1" } } */ + /* { dg-final { cleanup-tree-dump "slp2" } } */
