The following fixes an oversight when handling permuted non-grouped
.MASK_LOAD SLP discovery.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  This requires
1/2.

        PR tree-optimization/117050
        * tree-vect-slp.cc (vect_build_slp_tree_2): Properly handle
        non-grouped masked loads when handling permutations.

        * gcc.dg/vect/pr117050.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr117050.c | 18 ++++++++++++++++++
 gcc/tree-vect-slp.cc                 | 15 ++++++++-------
 2 files changed, 26 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr117050.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr117050.c 
b/gcc/testsuite/gcc.dg/vect/pr117050.c
new file mode 100644
index 00000000000..7b12cbc9ef4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr117050.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */
+
+typedef struct {
+  char *data;
+} song_sample_t;
+typedef struct {
+  int right_ramp;
+  int left_ramp;
+} song_voice_t;
+song_sample_t *csf_stop_sample_smp, *csf_stop_sample_v_3;
+song_voice_t *csf_stop_sample_v;
+void csf_stop_sample()
+{
+  for (int i; i; i++, csf_stop_sample_v++)
+    if (csf_stop_sample_v_3 || csf_stop_sample_smp->data)
+      csf_stop_sample_v->left_ramp = csf_stop_sample_v->right_ramp = 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 796fc4ba577..dd8f1befa25 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1986,7 +1986,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
          stmt_vec_info load_info;
          load_permutation.create (group_size);
          stmt_vec_info first_stmt_info
-           = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
+           = STMT_VINFO_GROUPED_ACCESS (stmt_info)
+             ? DR_GROUP_FIRST_ELEMENT (stmt_info) : stmt_info;
          bool any_permute = false;
          bool any_null = false;
          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
@@ -2045,17 +2046,17 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
                {
                  /* Discover the whole unpermuted load.  */
                  vec<stmt_vec_info> stmts2;
-                 stmts2.create (DR_GROUP_SIZE (first_stmt_info));
-                 stmts2.quick_grow_cleared (DR_GROUP_SIZE (first_stmt_info));
+                 unsigned dr_group_size = STMT_VINFO_GROUPED_ACCESS (stmt_info)
+                     ? DR_GROUP_SIZE (first_stmt_info) : 1;
+                 stmts2.create (dr_group_size);
+                 stmts2.quick_grow_cleared (dr_group_size);
                  unsigned i = 0;
                  for (stmt_vec_info si = first_stmt_info;
                       si; si = DR_GROUP_NEXT_ELEMENT (si))
                    stmts2[i++] = si;
-                 bool *matches2
-                   = XALLOCAVEC (bool, DR_GROUP_SIZE (first_stmt_info));
+                 bool *matches2 = XALLOCAVEC (bool, dr_group_size);
                  slp_tree unperm_load
-                   = vect_build_slp_tree (vinfo, stmts2,
-                                          DR_GROUP_SIZE (first_stmt_info),
+                   = vect_build_slp_tree (vinfo, stmts2, dr_group_size,
                                           &this_max_nunits, matches2, limit,
                                           &this_tree_size, bst_map);
                  /* When we are able to do the full masked load emit that
-- 
2.43.0

Reply via email to