When we do SLP discovery of a .MASK_LOAD for a dataref group with gaps the discovery for the mask will have gaps as well and this was unexpected in a few places. The following re-organizes things slightly to accomodate for this.
Bootstrap and regtest running on x86_64-unknown-linux-gnu. PR tree-optimization/117484 * tree-vect-slp.cc (vect_build_slp_tree_2): Handle gaps in mask discovery. Fix condition to release the load permutation. (vect_lower_load_permutations): Assert we get no load permutation for the unpermuted node. * gcc.dg/vect/pr117484-1.c: New testcase. * gcc.dg/vect/pr117484-2.c: Likewise. --- gcc/testsuite/gcc.dg/vect/pr117484-1.c | 13 +++++++++++++ gcc/testsuite/gcc.dg/vect/pr117484-2.c | 16 ++++++++++++++++ gcc/tree-vect-slp.cc | 22 +++++++++++++--------- 3 files changed, 42 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr117484-2.c diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-1.c b/gcc/testsuite/gcc.dg/vect/pr117484-1.c new file mode 100644 index 00000000000..453556c50f9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117484-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ + +extern int a; +extern short b[]; +extern signed char c[], d[]; +int main() +{ + for (long j = 3; j < 1024; j += 3) + if (c[j] ? b[j] : 0) { + b[j] = d[j - 2]; + a = d[j]; + } +} diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-2.c b/gcc/testsuite/gcc.dg/vect/pr117484-2.c new file mode 100644 index 00000000000..baffe7597ba --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117484-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ + +int a; +extern int d[]; +extern int b[]; +extern _Bool c[]; +extern char h[]; +int main() +{ + for (int i = 0; i < 1024; i += 4) + if (h[i] || c[i]) + { + a = d[i]; + b[i] = d[i - 3]; + } +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index d3efd53b00c..eebac1955de 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2019,14 +2019,15 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, = STMT_VINFO_GROUPED_ACCESS (stmt_info) ? DR_GROUP_FIRST_ELEMENT (stmt_info) : stmt_info; bool any_permute = false; - bool any_null = false; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) { int load_place; if (! load_info) { - load_place = j; - any_null = true; + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + load_place = j; + else + load_place = 0; } else if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) load_place = vect_get_place_in_interleaving_chain @@ -2037,11 +2038,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, any_permute |= load_place != j; load_permutation.quick_push (load_place); } - if (any_null) - { - gcc_assert (!any_permute); - load_permutation.release (); - } if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) { @@ -2096,6 +2092,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, followed by 'node' being the desired final permutation. */ if (unperm_load) { + gcc_assert + (!SLP_TREE_LOAD_PERMUTATION (unperm_load).exists ()); lane_permutation_t lperm; lperm.create (group_size); for (unsigned j = 0; j < load_permutation.length (); ++j) @@ -2116,6 +2114,10 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, } else { + if (!any_permute + && STMT_VINFO_GROUPED_ACCESS (stmt_info) + && group_size == DR_GROUP_SIZE (first_stmt_info)) + load_permutation.release (); SLP_TREE_LOAD_PERMUTATION (node) = load_permutation; return node; } @@ -2690,7 +2692,8 @@ out: tree op0; tree uniform_val = op0 = oprnd_info->ops[0]; for (j = 1; j < oprnd_info->ops.length (); ++j) - if (!operand_equal_p (uniform_val, oprnd_info->ops[j])) + if (oprnd_info->ops[j] + && !operand_equal_p (uniform_val, oprnd_info->ops[j])) { uniform_val = NULL_TREE; break; @@ -4525,6 +4528,7 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo, group_lanes, &max_nunits, matches, &limit, &tree_size, bst_map); + gcc_assert (!SLP_TREE_LOAD_PERMUTATION (l0).exists ()); if (ld_lanes_lanes != 0) { -- 2.43.0