When investigating some issues I ran into two SLP op gathering issues.
*You may not ask for SLP vector def ops in the wrong order*

Both vectorizable_comparison and vectorize_fold_left_reduction fall
into this trap.

I've also fixed a memleak uncovered by valgrind that I ran on the
above fix and a testcase.  I'm too lazy to try generate a testcase
that also fails with unpatched trunk but I think this qualifies
for this stage (at least the vectorizable_comparison one should
be easy to create).

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Richard.

>From b036b1ac618d9deb0efff89051f6ba1036d84ea3 Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguent...@suse.de>
Date: Mon, 21 Jan 2019 15:40:49 +0100
Subject: [PATCH] fix-slp-op-gatherings-and-memleak

        * tree-vect-loop.c (vect_analyze_loop_operations): Use
        auto_vec for cost vector to fix memleak.
        (vectorize_fold_left_reduction): Properly gather SLP defs.
        (vectorizable_comparison): Do not swap operands to properly
        gather SLP defs.

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 6ec8e72cd99..50d19427e54 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1470,8 +1470,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 
   DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
 
-  stmt_vector_for_cost cost_vec;
-  cost_vec.create (2);
+  auto_vec<stmt_info_for_cost> cost_vec;
 
   for (i = 0; i < nbbs; i++)
     {
@@ -1581,7 +1580,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
     } /* bbs */
 
   add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec);
-  cost_vec.release ();
 
   /* All operations in the loop are either irrelevant (deal with loop
      control, or dead), or only used outside the loop and can be moved
@@ -5742,8 +5740,14 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
   auto_vec<tree> vec_oprnds0;
   if (slp_node)
     {
-      vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
-                        slp_node);
+      auto_vec<vec<tree> > vec_defs (2);
+      auto_vec<tree> sops(2);
+      sops.quick_push (ops[0]);
+      sops.quick_push (ops[1]);
+      vect_get_slp_defs (sops, slp_node, &vec_defs);
+      vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
+      vec_defs[0].release ();
+      vec_defs[1].release ();
       group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
       scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
     }
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index c4f645e6d7d..b93097d6a6f 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9274,6 +9274,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
        BITOP2 (rhs1 BITOP1 rhs2) or
        rhs1 BITOP2 (BITOP1 rhs2)
      depending on bitop1 and bitop2 arity.  */
+  bool swap_p = false;
   if (VECTOR_BOOLEAN_TYPE_P (vectype))
     {
       if (code == GT_EXPR)
@@ -9290,15 +9291,13 @@ vectorizable_comparison (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
        {
          bitop1 = BIT_NOT_EXPR;
          bitop2 = BIT_AND_EXPR;
-         std::swap (rhs1, rhs2);
-         std::swap (dts[0], dts[1]);
+         swap_p = true;
        }
       else if (code == LE_EXPR)
        {
          bitop1 = BIT_NOT_EXPR;
          bitop2 = BIT_IOR_EXPR;
-         std::swap (rhs1, rhs2);
-         std::swap (dts[0], dts[1]);
+         swap_p = true;
        }
       else
        {
@@ -9365,6 +9364,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
              vect_get_slp_defs (ops, slp_node, &vec_defs);
              vec_oprnds1 = vec_defs.pop ();
              vec_oprnds0 = vec_defs.pop ();
+             if (swap_p)
+               std::swap (vec_oprnds0, vec_oprnds1);
            }
          else
            {
@@ -9384,6 +9385,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, 
gimple_stmt_iterator *gsi,
 
       if (!slp_node)
        {
+         if (swap_p)
+           std::swap (vec_rhs1, vec_rhs2);
          vec_oprnds0.quick_push (vec_rhs1);
          vec_oprnds1.quick_push (vec_rhs2);
        }

Reply via email to