When investigating some issues I ran into two SLP op gathering issues. *You may not ask for SLP vector def ops in the wrong order*
Both vectorizable_comparison and vectorize_fold_left_reduction fall into this trap. I've also fixed a memleak uncovered by valgrind that I ran on the above fix and a testcase. I'm too lazy to try generate a testcase that also fails with unpatched trunk but I think this qualifies for this stage (at least the vectorizable_comparison one should be easy to create). Bootstrap & regtest running on x86_64-unknown-linux-gnu. Richard. >From b036b1ac618d9deb0efff89051f6ba1036d84ea3 Mon Sep 17 00:00:00 2001 From: Richard Guenther <rguent...@suse.de> Date: Mon, 21 Jan 2019 15:40:49 +0100 Subject: [PATCH] fix-slp-op-gatherings-and-memleak * tree-vect-loop.c (vect_analyze_loop_operations): Use auto_vec for cost vector to fix memleak. (vectorize_fold_left_reduction): Properly gather SLP defs. (vectorizable_comparison): Do not swap operands to properly gather SLP defs. diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 6ec8e72cd99..50d19427e54 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1470,8 +1470,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) DUMP_VECT_SCOPE ("vect_analyze_loop_operations"); - stmt_vector_for_cost cost_vec; - cost_vec.create (2); + auto_vec<stmt_info_for_cost> cost_vec; for (i = 0; i < nbbs; i++) { @@ -1581,7 +1580,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) } /* bbs */ add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec); - cost_vec.release (); /* All operations in the loop are either irrelevant (deal with loop control, or dead), or only used outside the loop and can be moved @@ -5742,8 +5740,14 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info, auto_vec<tree> vec_oprnds0; if (slp_node) { - vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL, - slp_node); + auto_vec<vec<tree> > vec_defs (2); + auto_vec<tree> sops(2); + sops.quick_push (ops[0]); + sops.quick_push (ops[1]); + vect_get_slp_defs (sops, slp_node, &vec_defs); + vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]); + vec_defs[0].release (); + vec_defs[1].release (); group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index c4f645e6d7d..b93097d6a6f 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -9274,6 +9274,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2) depending on bitop1 and bitop2 arity. */ + bool swap_p = false; if (VECTOR_BOOLEAN_TYPE_P (vectype)) { if (code == GT_EXPR) @@ -9290,15 +9291,13 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, { bitop1 = BIT_NOT_EXPR; bitop2 = BIT_AND_EXPR; - std::swap (rhs1, rhs2); - std::swap (dts[0], dts[1]); + swap_p = true; } else if (code == LE_EXPR) { bitop1 = BIT_NOT_EXPR; bitop2 = BIT_IOR_EXPR; - std::swap (rhs1, rhs2); - std::swap (dts[0], dts[1]); + swap_p = true; } else { @@ -9365,6 +9364,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, vect_get_slp_defs (ops, slp_node, &vec_defs); vec_oprnds1 = vec_defs.pop (); vec_oprnds0 = vec_defs.pop (); + if (swap_p) + std::swap (vec_oprnds0, vec_oprnds1); } else { @@ -9384,6 +9385,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (!slp_node) { + if (swap_p) + std::swap (vec_rhs1, vec_rhs2); vec_oprnds0.quick_push (vec_rhs1); vec_oprnds1.quick_push (vec_rhs2); }