The following fixes an oversight when handling permuted non-grouped .MASK_LOAD SLP discovery.
Bootstrapped and tested on x86_64-unknown-linux-gnu. This requires 1/2. PR tree-optimization/117050 * tree-vect-slp.cc (vect_build_slp_tree_2): Properly handle non-grouped masked loads when handling permutations. * gcc.dg/vect/pr117050.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr117050.c | 18 ++++++++++++++++++ gcc/tree-vect-slp.cc | 15 ++++++++------- 2 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr117050.c diff --git a/gcc/testsuite/gcc.dg/vect/pr117050.c b/gcc/testsuite/gcc.dg/vect/pr117050.c new file mode 100644 index 00000000000..7b12cbc9ef4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117050.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */ + +typedef struct { + char *data; +} song_sample_t; +typedef struct { + int right_ramp; + int left_ramp; +} song_voice_t; +song_sample_t *csf_stop_sample_smp, *csf_stop_sample_v_3; +song_voice_t *csf_stop_sample_v; +void csf_stop_sample() +{ + for (int i; i; i++, csf_stop_sample_v++) + if (csf_stop_sample_v_3 || csf_stop_sample_smp->data) + csf_stop_sample_v->left_ramp = csf_stop_sample_v->right_ramp = 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 796fc4ba577..dd8f1befa25 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1986,7 +1986,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, stmt_vec_info load_info; load_permutation.create (group_size); stmt_vec_info first_stmt_info - = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]); + = STMT_VINFO_GROUPED_ACCESS (stmt_info) + ? DR_GROUP_FIRST_ELEMENT (stmt_info) : stmt_info; bool any_permute = false; bool any_null = false; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) @@ -2045,17 +2046,17 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, { /* Discover the whole unpermuted load. */ vec<stmt_vec_info> stmts2; - stmts2.create (DR_GROUP_SIZE (first_stmt_info)); - stmts2.quick_grow_cleared (DR_GROUP_SIZE (first_stmt_info)); + unsigned dr_group_size = STMT_VINFO_GROUPED_ACCESS (stmt_info) + ? DR_GROUP_SIZE (first_stmt_info) : 1; + stmts2.create (dr_group_size); + stmts2.quick_grow_cleared (dr_group_size); unsigned i = 0; for (stmt_vec_info si = first_stmt_info; si; si = DR_GROUP_NEXT_ELEMENT (si)) stmts2[i++] = si; - bool *matches2 - = XALLOCAVEC (bool, DR_GROUP_SIZE (first_stmt_info)); + bool *matches2 = XALLOCAVEC (bool, dr_group_size); slp_tree unperm_load - = vect_build_slp_tree (vinfo, stmts2, - DR_GROUP_SIZE (first_stmt_info), + = vect_build_slp_tree (vinfo, stmts2, dr_group_size, &this_max_nunits, matches2, limit, &this_tree_size, bst_map); /* When we are able to do the full masked load emit that -- 2.43.0