On Wed, 28 Jun 2023, Tamar Christina wrote:
> Hi All,
>
> For early break vectorization we have to update niters analysis to record and
> analyze all exits of the loop, and so all conds.
>
> The niters of the loop is still determined by the main/natural exit of the
> loop
> as this is the O(n) bounds. For now we don't do much with the secondary
> conds,
> but their assumptions can be used to generate versioning checks later.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
I probably confused vec_init_exit_info in the previous patch - that said,
I'm missing a clear function that determines the natural exit of the
original (if-converted) scalar loop. As vec_init_exit_info seems
to (re-)compute that I'll comment on it here.
+ /* The main IV is to be determined by the block that's the first
reachable
+ block from the latch. We cannot rely on the order the loop analysis
+ returns and we don't have any SCEV analysis on the loop. */
+ auto_vec <edge> workset;
+ workset.safe_push (loop_latch_edge (loop));
+ hash_set <edge> visited;
+
+ while (!workset.is_empty ())
+ {
+ edge e = workset.pop ();
+ if (visited.contains (e))
+ continue;
+
+ bool found_p = false;
+ for (edge ex : e->src->succs)
+ {
+ if (exits.contains (ex))
+ {
+ found_p = true;
+ e = ex;
+ break;
+ }
+ }
+
+ if (found_p)
+ {
+ loop->vec_loop_iv = e;
+ for (edge ex : exits)
+ if (e != ex)
+ loop->vec_loop_alt_exits.safe_push (ex);
+ return;
+ }
+ else
+ {
+ for (edge ex : e->src->preds)
+ workset.safe_insert (0, ex);
+ }
+ visited.add (e);
+ }
So this greedily follows edges from the latch and takes the first
exit. Why's that better than simply choosing the first?
I'd have done
auto_vec<edge> exits = get_loop_exit_edges (loop);
for (e : exits)
{
if (vect_get_loop_niters (...))
{
if no assumptions use that edge, if assumptions continue
searching, maybe ther's an edge w/o assumptions
}
}
use (first) exit with assumptions
we probably want to know 'may_be_zero' as well and prefer an edge
without that. So eventually call number_of_iterations_exit_assumptions
directly and look for the best niter_desc and pass that to
vect_get_loop_niters (or re-do the work).
As said for "copying" the exit to the loop copies use the block mapping.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vect_get_loop_niters): Analyze all exits and return
> all gconds.
> (vect_analyze_loop_form): Update code checking for conds.
> (vect_create_loop_vinfo): Handle having multiple conds.
> (vect_analyze_loop): Release extra loop conds structures.
> * tree-vectorizer.h (LOOP_VINFO_LOOP_CONDS,
> LOOP_VINFO_LOOP_IV_COND): New.
> (struct vect_loop_form_info): Add conds, loop_iv_cond.
>
> --- inline copy of patch --
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index
> 55e69a7ca0b24e0872477141db6f74dbf90b7981..9065811b3b9c2a550baf44768603172b9e26b94b
> 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -849,80 +849,106 @@ vect_fixup_scalar_cycles_with_patterns (loop_vec_info
> loop_vinfo)
> in NUMBER_OF_ITERATIONSM1. Place the condition under which the
> niter information holds in ASSUMPTIONS.
>
> - Return the loop exit condition. */
> + Return the loop exit conditions. */
>
>
> -static gcond *
> +static vec<gcond *>
> vect_get_loop_niters (class loop *loop, tree *assumptions,
> tree *number_of_iterations, tree *number_of_iterationsm1)
> {
> - edge exit = single_exit (loop);
> + auto_vec<edge> exits = get_loop_exit_edges (loop);
> + vec<gcond *> conds;
> + conds.create (exits.length ());
> class tree_niter_desc niter_desc;
> tree niter_assumptions, niter, may_be_zero;
> - gcond *cond = get_loop_exit_condition (loop);
>
> *assumptions = boolean_true_node;
> *number_of_iterationsm1 = chrec_dont_know;
> *number_of_iterations = chrec_dont_know;
> +
> DUMP_VECT_SCOPE ("get_loop_niters");
>
> - if (!exit)
> - return cond;
> + if (exits.is_empty ())
> + return conds;
>
> - may_be_zero = NULL_TREE;
> - if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
> - || chrec_contains_undetermined (niter_desc.niter))
> - return cond;
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location, "Loop has %d exits.\n",
> + exits.length ());
>
> - niter_assumptions = niter_desc.assumptions;
> - may_be_zero = niter_desc.may_be_zero;
> - niter = niter_desc.niter;
> + edge exit;
> + unsigned int i;
> + FOR_EACH_VEC_ELT (exits, i, exit)
> + {
> + gcond *cond = get_edge_condition (exit);
> + if (cond)
> + conds.safe_push (cond);
>
> - if (may_be_zero && integer_zerop (may_be_zero))
> - may_be_zero = NULL_TREE;
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location, "Analyzing exit %d...\n", i);
>
> - if (may_be_zero)
> - {
> - if (COMPARISON_CLASS_P (may_be_zero))
> + may_be_zero = NULL_TREE;
> + if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc,
> NULL)
> + || chrec_contains_undetermined (niter_desc.niter))
> + continue;
> +
> + niter_assumptions = niter_desc.assumptions;
> + may_be_zero = niter_desc.may_be_zero;
> + niter = niter_desc.niter;
> +
> + if (may_be_zero && integer_zerop (may_be_zero))
> + may_be_zero = NULL_TREE;
> +
> + if (may_be_zero)
> {
> - /* Try to combine may_be_zero with assumptions, this can simplify
> - computation of niter expression. */
> - if (niter_assumptions && !integer_nonzerop (niter_assumptions))
> - niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
> - niter_assumptions,
> - fold_build1 (TRUTH_NOT_EXPR,
> - boolean_type_node,
> - may_be_zero));
> + if (COMPARISON_CLASS_P (may_be_zero))
> + {
> + /* Try to combine may_be_zero with assumptions, this can simplify
> + computation of niter expression. */
> + if (niter_assumptions && !integer_nonzerop (niter_assumptions))
> + niter_assumptions = fold_build2 (TRUTH_AND_EXPR,
> boolean_type_node,
> + niter_assumptions,
> + fold_build1 (TRUTH_NOT_EXPR,
> + boolean_type_node,
> + may_be_zero));
> + else
> + niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
> + build_int_cst (TREE_TYPE (niter), 0),
> + rewrite_to_non_trapping_overflow (niter));
> +
> + may_be_zero = NULL_TREE;
> + }
> + else if (integer_nonzerop (may_be_zero) && exit == loop->vec_loop_iv)
> + {
> + *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0);
> + *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1);
> + continue;
> + }
> else
> - niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
> - build_int_cst (TREE_TYPE (niter), 0),
> - rewrite_to_non_trapping_overflow (niter));
> + continue;
> + }
>
> - may_be_zero = NULL_TREE;
> - }
> - else if (integer_nonzerop (may_be_zero))
> + /* Loop assumptions are based off the normal exit. */
> + if (exit == loop->vec_loop_iv)
> {
> - *number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), 0);
> - *number_of_iterations = build_int_cst (TREE_TYPE (niter), 1);
> - return cond;
> + *assumptions = niter_assumptions;
> + *number_of_iterationsm1 = niter;
> +
> + /* We want the number of loop header executions which is the number
> + of latch executions plus one.
> + ??? For UINT_MAX latch executions this number overflows to zero
> + for loops like do { n++; } while (n != 0); */
> + if (niter && !chrec_contains_undetermined (niter))
> + niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter),
> + unshare_expr (niter),
> + build_int_cst (TREE_TYPE (niter), 1));
> + *number_of_iterations = niter;
> }
> - else
> - return cond;
> }
>
> - *assumptions = niter_assumptions;
> - *number_of_iterationsm1 = niter;
> -
> - /* We want the number of loop header executions which is the number
> - of latch executions plus one.
> - ??? For UINT_MAX latch executions this number overflows to zero
> - for loops like do { n++; } while (n != 0); */
> - if (niter && !chrec_contains_undetermined (niter))
> - niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter), unshare_expr (niter),
> - build_int_cst (TREE_TYPE (niter), 1));
> - *number_of_iterations = niter;
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_NOTE, vect_location, "All loop exits successfully
> analyzed.\n");
>
> - return cond;
> + return conds;
> }
>
> /* Function bb_in_loop_p
> @@ -1768,15 +1794,26 @@ vect_analyze_loop_form (class loop *loop,
> vect_loop_form_info *info)
> "not vectorized:"
> " abnormal loop exit edge.\n");
>
> - info->loop_cond
> + info->conds
> = vect_get_loop_niters (loop, &info->assumptions,
> &info->number_of_iterations,
> &info->number_of_iterationsm1);
> - if (!info->loop_cond)
> +
> + if (info->conds.is_empty ())
> return opt_result::failure_at
> (vect_location,
> "not vectorized: complicated exit condition.\n");
>
> + /* Determine what the primary and alternate exit conds are. */
> + info->alt_loop_conds.create (info->conds.length () - 1);
> + for (gcond *cond : info->conds)
> + {
> + if (loop->vec_loop_iv->src != gimple_bb (cond))
> + info->alt_loop_conds.quick_push (cond);
> + else
> + info->loop_cond = cond;
> + }
Do you really need those explicitely? ->conds and ->alt_loop_conds
looks redundant at least.
> +
> if (integer_zerop (info->assumptions)
> || !info->number_of_iterations
> || chrec_contains_undetermined (info->number_of_iterations))
> @@ -1821,8 +1858,14 @@ vect_create_loop_vinfo (class loop *loop,
> vec_info_shared *shared,
> if (!integer_onep (info->assumptions) && !main_loop_info)
> LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = info->assumptions;
>
> - stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (info->loop_cond);
> - STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type;
> + for (gcond *cond : info->alt_loop_conds)
> + {
> + stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (cond);
> + STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type;
> + }
> + LOOP_VINFO_LOOP_CONDS (loop_vinfo).safe_splice (info->alt_loop_conds);
> + LOOP_VINFO_LOOP_IV_COND (loop_vinfo) = info->loop_cond;
> +
> if (info->inner_loop_cond)
> {
> stmt_vec_info inner_loop_cond_info
> @@ -3520,6 +3563,9 @@ vect_analyze_loop (class loop *loop, vec_info_shared
> *shared)
> "***** Choosing vector mode %s\n",
> GET_MODE_NAME (first_loop_vinfo->vector_mode));
>
> + loop_form_info.conds.release ();
> + loop_form_info.alt_loop_conds.release ();
> +
> /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
> enabled, SIMDUID is not set, it is the innermost loop and we have
> either already found the loop's SIMDLEN or there was no SIMDLEN to
> @@ -3631,6 +3677,9 @@ vect_analyze_loop (class loop *loop, vec_info_shared
> *shared)
> (first_loop_vinfo->epilogue_vinfos[0]->vector_mode));
> }
>
> + loop_form_info.conds.release ();
> + loop_form_info.alt_loop_conds.release ();
> +
> return first_loop_vinfo;
> }
>
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index
> bd5eceb5da7a45ef036cd14609ebe091799320bf..1cc003c12e2447eca878f56cb019236f56e96f85
> 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -876,6 +876,12 @@ public:
> we need to peel off iterations at the end to form an epilogue loop. */
> bool peeling_for_niter;
>
> + /* List of loop additional IV conditionals found in the loop. */
drop "IV"
> + auto_vec<gcond *> conds;
> +
> + /* Main loop IV cond. */
> + gcond* loop_iv_cond;
> +
I guess I have to look at the followup patches to see how often we
have to access loop_iv_cond/conds.
> /* True if there are no loop carried data dependencies in the loop.
> If loop->safelen <= 1, then this is always true, either the loop
> didn't have any loop carried data dependencies, or the loop is being
> @@ -966,6 +972,8 @@ public:
> #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
> #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
> #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
> +#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
> +#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
> #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
> #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop
> #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling
> @@ -2353,7 +2361,9 @@ struct vect_loop_form_info
> tree number_of_iterations;
> tree number_of_iterationsm1;
> tree assumptions;
> + vec<gcond *> conds;
> gcond *loop_cond;
> + vec<gcond *> alt_loop_conds;
> gcond *inner_loop_cond;
> };
> extern opt_result vect_analyze_loop_form (class loop *, vect_loop_form_info
> *);
>
>
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)