https://gcc.gnu.org/g:4b47acfe2b626d1276e229a0cf165e934813df6c
commit r16-2158-g4b47acfe2b626d1276e229a0cf165e934813df6c Author: Richard Biener <rguent...@suse.de> Date: Wed Jul 9 12:53:45 2025 +0200 Remove non-SLP vectorization factor determining The following removes the VF determining step from non-SLP stmts. For now we keep setting STMT_VINFO_VECTYPE for all stmts, there are too many places to fix, including some more complicated ones, so this is defered for a followup. Along this removes vect_update_vf_for_slp, merging the check for present hybrid SLP stmts to vect_detect_hybrid_slp and fail analysis early. This also removes to essentially duplicate this check in the stmt walk of vect_analyze_loop_operations. Getting rid of that, and performing some other checks earlier is also defered to a followup. * tree-vect-loop.cc (vect_determine_vf_for_stmt_1): Rename to ... (vect_determine_vectype_for_stmt_1): ... this and only set STMT_VINFO_VECTYPE. Fail for single-element vector types. (vect_determine_vf_for_stmt): Rename to ... (vect_determine_vectype_for_stmt): ... this and only set STMT_VINFO_VECTYPE. Fail for single-element vector types. (vect_determine_vectorization_factor): Rename to ... (vect_set_stmts_vectype): ... this and only set STMT_VINFO_VECTYPE. (vect_update_vf_for_slp): Remove. (vect_analyze_loop_operations): Remove walk over stmts. (vect_analyze_loop_2): Call vect_set_stmts_vectype instead of vect_determine_vectorization_factor. Set vectorization factor from LOOP_VINFO_SLP_UNROLLING_FACTOR. Fail if vect_detect_hybrid_slp detects hybrid stmts or when vect_make_slp_decision finds nothing to SLP. * tree-vect-slp.cc (vect_detect_hybrid_slp): Move check whether we have any hybrid stmts here from vect_update_vf_for_slp * tree-vect-stmts.cc (vect_analyze_stmt): Remove loop over stmts. * tree-vectorizer.h (vect_detect_hybrid_slp): Update. Diff: --- gcc/tree-vect-loop.cc | 220 ++++++++++--------------------------------------- gcc/tree-vect-slp.cc | 48 ++++++++++- gcc/tree-vect-stmts.cc | 12 ++- gcc/tree-vectorizer.h | 2 +- 4 files changed, 100 insertions(+), 182 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 42e00159ff82..98ac528e3a97 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -168,9 +168,8 @@ static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, may already be set for general statements (not just data refs). */ static opt_result -vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, - bool vectype_maybe_set_p, - poly_uint64 *vf) +vect_determine_vectype_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, + bool vectype_maybe_set_p) { gimple *stmt = stmt_info->stmt; @@ -192,6 +191,12 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, if (stmt_vectype) { + if (known_le (TYPE_VECTOR_SUBPARTS (stmt_vectype), 1U)) + return opt_result::failure_at (STMT_VINFO_STMT (stmt_info), + "not vectorized: unsupported " + "data-type in %G", + STMT_VINFO_STMT (stmt_info)); + if (STMT_VINFO_VECTYPE (stmt_info)) /* The only case when a vectype had been already set is for stmts that contain a data ref, or for "pattern-stmts" (stmts generated @@ -203,9 +208,6 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype; } - if (nunits_vectype) - vect_update_max_nunits (vf, nunits_vectype); - return opt_result::success (); } @@ -215,13 +217,12 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info, or false if something prevented vectorization. */ static opt_result -vect_determine_vf_for_stmt (vec_info *vinfo, - stmt_vec_info stmt_info, poly_uint64 *vf) +vect_determine_vectype_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", stmt_info->stmt); - opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, false, vf); + opt_result res = vect_determine_vectype_for_stmt_1 (vinfo, stmt_info, false); if (!res) return res; @@ -240,7 +241,7 @@ vect_determine_vf_for_stmt (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "==> examining pattern def stmt: %G", def_stmt_info->stmt); - res = vect_determine_vf_for_stmt_1 (vinfo, def_stmt_info, true, vf); + res = vect_determine_vectype_for_stmt_1 (vinfo, def_stmt_info, true); if (!res) return res; } @@ -249,7 +250,7 @@ vect_determine_vf_for_stmt (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "==> examining pattern statement: %G", stmt_info->stmt); - res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, true, vf); + res = vect_determine_vectype_for_stmt_1 (vinfo, stmt_info, true); if (!res) return res; } @@ -257,45 +258,23 @@ vect_determine_vf_for_stmt (vec_info *vinfo, return opt_result::success (); } -/* Function vect_determine_vectorization_factor - - Determine the vectorization factor (VF). VF is the number of data elements - that are operated upon in parallel in a single iteration of the vectorized - loop. For example, when vectorizing a loop that operates on 4byte elements, - on a target with vector size (VS) 16byte, the VF is set to 4, since 4 - elements can fit in a single vector register. +/* Function vect_set_stmts_vectype - We currently support vectorization of loops in which all types operated upon - are of the same size. Therefore this function currently sets VF according to - the size of the types operated upon, and fails if there are multiple sizes - in the loop. - - VF is also the factor by which the loop iterations are strip-mined, e.g.: - original loop: - for (i=0; i<N; i++){ - a[i] = b[i] + c[i]; - } - - vectorized loop: - for (i=0; i<N; i+=VF){ - a[i:VF] = b[i:VF] + c[i:VF]; - } -*/ + Set STMT_VINFO_VECTYPE of all stmts. */ static opt_result -vect_determine_vectorization_factor (loop_vec_info loop_vinfo) +vect_set_stmts_vectype (loop_vec_info loop_vinfo) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); unsigned nbbs = loop->num_nodes; - poly_uint64 vectorization_factor = 1; tree scalar_type = NULL_TREE; gphi *phi; tree vectype; stmt_vec_info stmt_info; unsigned i; - DUMP_VECT_SCOPE ("vect_determine_vectorization_factor"); + DUMP_VECT_SCOPE ("vect_set_stmts_vectype"); for (i = 0; i < nbbs; i++) { @@ -324,7 +303,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) scalar_type); vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); - if (!vectype) + if (!vectype + || known_le (TYPE_VECTOR_SUBPARTS (vectype), 1U)) return opt_result::failure_at (phi, "not vectorized: unsupported " "data-type %T\n", @@ -334,15 +314,6 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); - dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype)); - dump_printf (MSG_NOTE, "\n"); - } - - vect_update_max_nunits (&vectorization_factor, vectype); } } @@ -353,25 +324,12 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) continue; stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); opt_result res - = vect_determine_vf_for_stmt (loop_vinfo, - stmt_info, &vectorization_factor); + = vect_determine_vectype_for_stmt (loop_vinfo, stmt_info); if (!res) return res; } } - /* TODO: Analyze cost. Decide if worth while to vectorize. */ - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); - dump_dec (MSG_NOTE, vectorization_factor); - dump_printf (MSG_NOTE, "\n"); - } - - if (known_le (vectorization_factor, 1U)) - return opt_result::failure_at (vect_location, - "not vectorized: unsupported data-type\n"); - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; return opt_result::success (); } @@ -2002,89 +1960,6 @@ vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared, -/* Scan the loop stmts and dependent on whether there are any (non-)SLP - statements update the vectorization factor. */ - -static void -vect_update_vf_for_slp (loop_vec_info loop_vinfo) -{ - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); - int nbbs = loop->num_nodes; - poly_uint64 vectorization_factor; - int i; - - DUMP_VECT_SCOPE ("vect_update_vf_for_slp"); - - vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - gcc_assert (known_ne (vectorization_factor, 0U)); - - /* If all the stmts in the loop can be SLPed, we perform only SLP, and - vectorization factor of the loop is the unrolling factor required by - the SLP instances. If that unrolling factor is 1, we say, that we - perform pure SLP on loop - cross iteration parallelism is not - exploited. */ - bool only_slp_in_loop = true; - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; - for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); - gsi_next (&si)) - { - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); - if (!stmt_info) - continue; - if ((STMT_VINFO_RELEVANT_P (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - && !PURE_SLP_STMT (stmt_info)) - /* STMT needs both SLP and loop-based vectorization. */ - only_slp_in_loop = false; - } - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - if (is_gimple_debug (gsi_stmt (si))) - continue; - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - stmt_info = vect_stmt_to_vectorize (stmt_info); - if ((STMT_VINFO_RELEVANT_P (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - && !PURE_SLP_STMT (stmt_info)) - /* STMT needs both SLP and loop-based vectorization. */ - only_slp_in_loop = false; - } - } - - if (only_slp_in_loop) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Loop contains only SLP stmts\n"); - vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); - } - else - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Loop contains SLP and non-SLP stmts\n"); - /* Both the vectorization factor and unroll factor have the form - GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X, - so they must have a common multiple. */ - vectorization_factor - = force_common_multiple (vectorization_factor, - LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); - } - - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, - "Updating vectorization factor to "); - dump_dec (MSG_NOTE, vectorization_factor); - dump_printf (MSG_NOTE, ".\n"); - } -} - /* Return true if STMT_INFO describes a double reduction phi and if the other phi in the reduction is also relevant for vectorization. This rejects cases such as: @@ -2207,24 +2082,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) "supported: %G", static_cast <gimple *> (phi)); } - - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { - gimple *stmt = gsi_stmt (si); - if (!gimple_clobber_p (stmt) - && !is_gimple_debug (stmt)) - { - bool need_to_vectorize = false; - opt_result res - = vect_analyze_stmt (loop_vinfo, - loop_vinfo->lookup_stmt (stmt), - &need_to_vectorize, - NULL, NULL, NULL); - if (!res) - return res; - } - } } /* bbs */ return opt_result::success (); @@ -2839,19 +2696,18 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, return opt_result::failure_at (vect_location, "bad data dependence.\n"); LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf; - ok = vect_determine_vectorization_factor (loop_vinfo); + ok = vect_set_stmts_vectype (loop_vinfo); if (!ok) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't determine vectorization factor.\n"); + "cannot determine vector types.\n"); return ok; } /* Compute the scalar iteration cost. */ vect_compute_single_scalar_iteration_cost (loop_vinfo); - poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); bool saved_can_use_partial_vectors_p = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo); @@ -2867,21 +2723,29 @@ start_over: return ok; /* If there are any SLP instances mark them as pure_slp. */ - if (vect_make_slp_decision (loop_vinfo)) - { - /* Find stmts that need to be both vectorized and SLPed. */ - vect_detect_hybrid_slp (loop_vinfo); + if (!vect_make_slp_decision (loop_vinfo)) + return opt_result::failure_at (vect_location, "no stmts to vectorize.\n"); - /* Update the vectorization factor based on the SLP decision. */ - vect_update_vf_for_slp (loop_vinfo); + /* Find stmts that need to be both vectorized and SLPed. */ + if (!vect_detect_hybrid_slp (loop_vinfo)) + return opt_result::failure_at (vect_location, "needs non-SLP handling\n"); - /* Optimize the SLP graph with the vectorization factor fixed. */ - vect_optimize_slp (loop_vinfo); - - /* Gather the loads reachable from the SLP graph entries. */ - vect_gather_slp_loads (loop_vinfo); + /* Determine the vectorization factor from the SLP decision. */ + LOOP_VINFO_VECT_FACTOR (loop_vinfo) + = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); + dump_dec (MSG_NOTE, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + dump_printf (MSG_NOTE, "\n"); } + /* Optimize the SLP graph with the vectorization factor fixed. */ + vect_optimize_slp (loop_vinfo); + + /* Gather the loads reachable from the SLP graph entries. */ + vect_gather_slp_loads (loop_vinfo); + /* We don't expect to have to roll back to anything other than an empty set of rgroups. */ gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ()); @@ -3272,8 +3136,8 @@ again: dump_printf_loc (MSG_NOTE, vect_location, "re-trying with single-lane SLP\n"); - /* Restore vectorization factor as it were without SLP. */ - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor; + /* Reset the vectorization factor. */ + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = 0; /* Free the SLP instances. */ FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance) vect_free_slp_instance (instance); diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 7a828cadbd53..f97a3635cff1 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -7691,7 +7691,7 @@ maybe_push_to_hybrid_worklist (vec_info *vinfo, /* Find stmts that must be both vectorized and SLPed. */ -void +bool vect_detect_hybrid_slp (loop_vec_info loop_vinfo) { DUMP_VECT_SCOPE ("vect_detect_hybrid_slp"); @@ -7772,6 +7772,52 @@ vect_detect_hybrid_slp (loop_vec_info loop_vinfo) vect_detect_hybrid_slp (&gs_info.offset, &dummy, &wi); } } + + /* Determine if all the stmts in the loop can be SLPed. */ + for (unsigned i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; i++) + { + basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; + for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); + gsi_next (&si)) + { + stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); + if (!stmt_info) + continue; + if ((STMT_VINFO_RELEVANT_P (stmt_info) + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) + && !PURE_SLP_STMT (stmt_info)) + { + /* STMT needs both SLP and loop-based vectorization. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Loop contains SLP and non-SLP stmts\n"); + return false; + } + } + for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); + gsi_next (&si)) + { + if (is_gimple_debug (gsi_stmt (si))) + continue; + stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); + stmt_info = vect_stmt_to_vectorize (stmt_info); + if ((STMT_VINFO_RELEVANT_P (stmt_info) + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) + && !PURE_SLP_STMT (stmt_info)) + { + /* STMT needs both SLP and loop-based vectorization. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Loop contains SLP and non-SLP stmts\n"); + return false; + } + } + } + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Loop contains only SLP stmts\n"); + return true; } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index da201274c786..081dd653fd46 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -13392,8 +13392,16 @@ vect_analyze_stmt (vec_info *vinfo, /* When we arrive here with a non-SLP statement and we are supposed to use SLP for everything fail vectorization. */ if (!node) - return opt_result::failure_at (stmt_info->stmt, - "needs non-SLP handling\n"); + { + /* We leave is_simple_and_all_uses_invariant but live stmts + around with no need to vectorize them. */ + if (!STMT_VINFO_RELEVANT_P (stmt_info) + && STMT_VINFO_LIVE_P (stmt_info) + && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) + return opt_result::success (); + return opt_result::failure_at (stmt_info->stmt, + "needs non-SLP handling\n"); + } ok = true; if (!bb_vinfo diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index ba06c5d2e664..b050286c84cc 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2689,7 +2689,7 @@ extern bool vect_slp_analyze_operations (vec_info *); extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &); extern opt_result vect_analyze_slp (vec_info *, unsigned, bool); extern bool vect_make_slp_decision (loop_vec_info); -extern void vect_detect_hybrid_slp (loop_vec_info); +extern bool vect_detect_hybrid_slp (loop_vec_info); extern void vect_optimize_slp (vec_info *); extern void vect_gather_slp_loads (vec_info *); extern tree vect_get_slp_scalar_def (slp_tree, unsigned);