https://gcc.gnu.org/g:b423891ad43d003a565e7b5c6ed648e446bd3c7c
commit b423891ad43d003a565e7b5c6ed648e446bd3c7c Author: Richard Biener <rguent...@suse.de> Date: Fri Feb 23 11:45:50 2024 +0100 Do single-lane SLP discovery for reductions The following performs single-lane SLP discovery for reductions. This exposes a latent issue with reduction SLP in outer loop vectorization and makes gcc.dg/vect/vect-outer-4[fgkl].c FAIL execution. * tree-vect-slp.cc (vect_build_slp_tree_2): Only multi-lane discoveries are reduction chains and need special backedge treatment. (vect_analyze_slp): Fall back to single-lane SLP discovery for reductions. Make sure to try single-lane SLP reduction for all reductions as fallback. Diff: --- gcc/tree-vect-slp.cc | 58 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ecc185aae885..f39cde3a8d50 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1918,7 +1918,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, /* Reduction chain backedge defs are filled manually. ??? Need a better way to identify a SLP reduction chain PHI. Or a better overall way to SLP match those. */ - if (all_same && def_type == vect_reduction_def) + if (stmts.length () > 1 + && all_same && def_type == vect_reduction_def) skip_args[loop_latch_edge (loop)->dest_idx] = true; } else if (def_type != vect_internal_def) @@ -3911,7 +3912,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) } /* Find SLP sequences starting from groups of reductions. */ - if (loop_vinfo->reductions.length () > 1) + if (loop_vinfo->reductions.length () > 0) { /* Collect reduction statements. */ vec<stmt_vec_info> scalar_stmts; @@ -3934,17 +3935,54 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) && gimple_assign_rhs_code (g) != WIDEN_SUM_EXPR && gimple_assign_rhs_code (g) != SAD_EXPR))) scalar_stmts.quick_push (next_info); + else if (param_vect_single_lane_slp != 0) + { + vec<stmt_vec_info> stmts; + vec<stmt_vec_info> roots = vNULL; + vec<tree> remain = vNULL; + stmts.create (1); + stmts.quick_push (next_info); + bool res = vect_build_slp_instance (vinfo, + slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL); + gcc_assert (res); + } } - if (scalar_stmts.length () > 1) + vec<stmt_vec_info> roots = vNULL; + vec<tree> remain = vNULL; + vec<stmt_vec_info> saved_stmts = vNULL; + if (param_vect_single_lane_slp != 0) + /* ??? scalar_stmts ownership and arg passing sucks. */ + saved_stmts = scalar_stmts.copy (); + if ((scalar_stmts.length () <= 1 + || !vect_build_slp_instance (loop_vinfo, + slp_inst_kind_reduc_group, + scalar_stmts, roots, remain, + max_tree_size, &limit, bst_map, + NULL)) + && param_vect_single_lane_slp != 0) { - vec<stmt_vec_info> roots = vNULL; - vec<tree> remain = vNULL; - vect_build_slp_instance (loop_vinfo, slp_inst_kind_reduc_group, - scalar_stmts, roots, remain, - max_tree_size, &limit, bst_map, NULL); + if (scalar_stmts.length () <= 1) + scalar_stmts.release (); + /* Do SLP discovery for single-lane reductions. */ + for (auto stmt_info : saved_stmts) + { + vec<stmt_vec_info> stmts; + vec<stmt_vec_info> roots = vNULL; + vec<tree> remain = vNULL; + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (stmt_info)); + bool res = vect_build_slp_instance (vinfo, + slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL); + gcc_assert (res); + } + saved_stmts.release (); } - else - scalar_stmts.release (); } }