On Tue, 11 Jul 2023, Jan Hubicka wrote:
> Hi,
> this patch improves profile update in loop-ch to handle situation where
> duplicated header
> has loop invariant test. In this case we konw that all count of the exit
> edge belongs to
> the duplicated loop header edge and can update probabilities accordingly.
> Since we also do all the work to track this information from analysis to
> duplicaiton
> I also added code to turn those conditionals to constants so we do not need
> later
> jump threading pass to clean up.
>
> This made me to work out that the propagatoin was buggy in few aspects
> 1) it handled every PHI as PHI in header and incorrectly assigned some PHIs
> to be IV-like when they are not
> 2) it did not check for novops calls that are not required to return same
> value on every invocation.
> 3) I also added check for asm statement since those are not necessarily
> reproducible either.
>
> I would like to do more changes, but tried to prevent this patch from
> snowballing. The analysis of what statements will remain after duplication
> can
> be improved. I think we should use ranger query for other than first basic
> block, too and possibly drop the IV heuristics then. Also it seems that a lot
> of this logic is pretty much same to analysis in peeling pass, so unifying
> this
> would be nice.
Indeed.
> I also think I should move the profile update out of
> gimple_duplicate_sese_region (it is now very specific to ch) and rename it,
> since those regions are singe entry multiple exit.
Please. Maybe move it to tree-ssa-loop-ch.cc as well.
> Bootstrapped/regtsted x86_64-linux, OK?
OK, thanks for improving this.
Richard.
> Honza
>
> gcc/ChangeLog:
>
> * tree-cfg.cc (gimple_duplicate_sese_region): Add ORIG_ELIMINATED_EDGES
> parameter and rewrite profile updating code to handle edges elimination.
> * tree-cfg.h (gimple_duplicate_sese_region): Update prototpe.
> * tree-ssa-loop-ch.cc (loop_invariant_op_p): New function.
> (loop_iv_derived_p): New function.
> (should_duplicate_loop_header_p): Track invariant exit edges; fix
> handling
> of PHIs and propagation of IV derived variables.
> (ch_base::copy_headers): Pass around the invariant edges hash set.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/loop-ch-profile-1.c: Remove xfail.
>
> diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
> index 4989906706c..3879fb7c4c1 100644
> --- a/gcc/tree-cfg.cc
> +++ b/gcc/tree-cfg.cc
> @@ -6661,14 +6661,16 @@ add_phi_args_after_copy (basic_block *region_copy,
> unsigned n_region,
> true otherwise.
>
> ELIMINATED_EDGE is an edge that is known to be removed in the dupicated
> - region. */
> + region. ORIG_ELIMINATED_EDGES, if non-NULL is set of edges known to be
> + removed from the original region. */
>
> bool
> gimple_duplicate_sese_region (edge entry, edge exit,
> basic_block *region, unsigned n_region,
> basic_block *region_copy,
> bool update_dominance,
> - edge eliminated_edge)
> + edge eliminated_edge,
> + hash_set <edge> *orig_eliminated_edges)
> {
> unsigned i;
> bool free_region_copy = false, copying_header = false;
> @@ -6747,7 +6749,8 @@ gimple_duplicate_sese_region (edge entry, edge exit,
> split_edge_bb_loc (entry), update_dominance);
> if (total_count.initialized_p () && entry_count.initialized_p ())
> {
> - if (!eliminated_edge)
> + if (!eliminated_edge
> + && (!orig_eliminated_edges || orig_eliminated_edges->is_empty ()))
> {
> scale_bbs_frequencies_profile_count (region, n_region,
> total_count - entry_count,
> @@ -6765,7 +6768,7 @@ gimple_duplicate_sese_region (edge entry, edge exit,
> if (cond1) <- this condition will become false
> and we update probabilities
> goto loop_exit;
> - if (cond2)
> + if (cond2) <- this condition is loop invariant
> goto loop_exit;
> goto loop_header <- this will be redirected to loop.
> // region_copy_end
> @@ -6776,6 +6779,7 @@ gimple_duplicate_sese_region (edge entry, edge exit,
> if (cond1) <- we need to update probabbility here
> goto loop_exit;
> if (cond2) <- and determine scaling factor here.
> + moreover cond2 is now always true
> goto loop_exit;
> else
> goto loop;
> @@ -6785,53 +6789,84 @@ gimple_duplicate_sese_region (edge entry, edge exit,
> but only consumer so far is tree-ssa-loop-ch and it uses only this
> to handle the common case of peeling headers which have
> conditionals known to be always true upon entry. */
> - gcc_assert (eliminated_edge->src == region[0]
> - && EDGE_COUNT (region[0]->succs) == 2
> - && copying_header);
> -
> - edge e, e_copy, eliminated_edge_copy;
> - if (EDGE_SUCC (region[0], 0) == eliminated_edge)
> - {
> - e = EDGE_SUCC (region[0], 1);
> - e_copy = EDGE_SUCC (region_copy[0], 1);
> - eliminated_edge_copy = EDGE_SUCC (region_copy[0], 0);
> - }
> - else
> + gcc_checking_assert (copying_header);
> + for (unsigned int i = 0; i < n_region; i++)
> {
> - e = EDGE_SUCC (region[0], 0);
> - e_copy = EDGE_SUCC (region_copy[0], 0);
> - eliminated_edge_copy = EDGE_SUCC (region_copy[0], 1);
> - }
> - gcc_checking_assert (e != e_copy
> - && eliminated_edge_copy != eliminated_edge
> - && eliminated_edge_copy->dest
> - == eliminated_edge->dest);
> -
> + edge exit_e, exit_e_copy, e, e_copy;
> + if (EDGE_COUNT (region[i]->succs) == 1)
> + {
> + region_copy[i]->count = entry_count;
> + region[i]->count -= entry_count;
> + continue;
> + }
>
> - /* Handle first basic block in duplicated region as in the
> - non-eliminating case. */
> - scale_bbs_frequencies_profile_count (region_copy, n_region,
> - entry_count, total_count);
> - /* Now update redirecting eliminated edge to the other edge.
> - Actual CFG update is done by caller. */
> - e_copy->probability = profile_probability::always ();
> - eliminated_edge_copy->probability = profile_probability::never ();
> - /* Header copying is a special case of jump threading, so use
> - common code to update loop body exit condition. */
> - update_bb_profile_for_threading (region[0], e_copy->count (), e);
> - /* If we duplicated more conditionals first scale the profile of
> - rest of the preheader. Then work out the probability of
> - entering the loop and scale rest of the loop. */
> - if (n_region > 1)
> - {
> - scale_bbs_frequencies_profile_count (region_copy + 1,
> - n_region - 1,
> - e_copy->count (),
> - region_copy[1]->count);
> - scale_bbs_frequencies_profile_count (region + 1, n_region - 1,
> - e->count (),
> - region[1]->count);
> + gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2);
> + if (loop_exit_edge_p (region[0]->loop_father,
> + EDGE_SUCC (region[i], 0)))
> + {
> + exit_e = EDGE_SUCC (region[i], 0);
> + exit_e_copy = EDGE_SUCC (region_copy[i], 0);
> + e = EDGE_SUCC (region[i], 1);
> + e_copy = EDGE_SUCC (region_copy[i], 1);
> + }
> + else
> + {
> + exit_e = EDGE_SUCC (region[i], 1);
> + exit_e_copy = EDGE_SUCC (region_copy[i], 1);
> + e = EDGE_SUCC (region[i], 0);
> + e_copy = EDGE_SUCC (region_copy[i], 0);
> + }
> + gcc_assert (i == n_region - 1
> + || (e->dest == region[i + 1]
> + && e_copy->dest == region_copy[i + 1]));
> + region_copy[i]->count = entry_count;
> + profile_count exit_e_count = exit_e->count ();
> + if (eliminated_edge == exit_e)
> + {
> + /* Update profile and the conditional.
> + CFG update is done by caller. */
> + e_copy->probability = profile_probability::always ();
> + exit_e_copy->probability = profile_probability::never ();
> + gcond *cond_stmt
> + = as_a <gcond *>(*gsi_last_bb (region_copy[i]));
> + if (e_copy->flags & EDGE_TRUE_VALUE)
> + gimple_cond_make_true (cond_stmt);
> + else
> + gimple_cond_make_false (cond_stmt);
> + update_stmt (cond_stmt);
> + /* Header copying is a special case of jump threading, so use
> + common code to update loop body exit condition. */
> + update_bb_profile_for_threading (region[i], entry_count, e);
> + eliminated_edge = NULL;
> + }
> + else
> + region[i]->count -= region_copy[i]->count;
> + if (orig_eliminated_edges->contains (exit_e))
> + {
> + orig_eliminated_edges->remove (exit_e);
> + /* All exits will happen in exit_e_copy which is out of the
> + loop, so increase probability accordingly.
> + If the edge is eliminated_edge we already corrected
> + profile above. */
> + if (entry_count.nonzero_p () && eliminated_edge != exit_e)
> + set_edge_probability_and_rescale_others
> + (exit_e_copy, exit_e_count.probability_in
> + (entry_count));
> + /* Eliminate in-loop conditional. */
> + e->probability = profile_probability::always ();
> + exit_e->probability = profile_probability::never ();
> + gcond *cond_stmt = as_a <gcond *>(*gsi_last_bb (region[i]));
> + if (e->flags & EDGE_TRUE_VALUE)
> + gimple_cond_make_true (cond_stmt);
> + else
> + gimple_cond_make_false (cond_stmt);
> + update_stmt (cond_stmt);
> + }
> + entry_count = e_copy->count ();
> }
> + /* Be sure that we seen all edges we are supposed to update. */
> + gcc_checking_assert (!eliminated_edge
> + && orig_eliminated_edges->is_empty ());
> }
> }
>
> diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h
> index b9ccd58c3db..a7cc37f3b97 100644
> --- a/gcc/tree-cfg.h
> +++ b/gcc/tree-cfg.h
> @@ -70,7 +70,8 @@ extern void add_phi_args_after_copy_bb (basic_block);
> extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
> extern basic_block split_edge_bb_loc (edge);
> extern bool gimple_duplicate_sese_region (edge, edge, basic_block *,
> unsigned,
> - basic_block *, bool, edge);
> + basic_block *, bool, edge,
> + hash_set <edge> *);
> extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
> basic_block *);
> extern void gather_blocks_in_sese_region (basic_block entry, basic_block
> exit,
> diff --git a/gcc/tree-ssa-loop-ch.cc b/gcc/tree-ssa-loop-ch.cc
> index 72792cec21f..cae6266be46 100644
> --- a/gcc/tree-ssa-loop-ch.cc
> +++ b/gcc/tree-ssa-loop-ch.cc
> @@ -97,13 +97,42 @@ static_loop_exit (class loop *l, gimple_ranger *ranger)
> return r == desired_static_range ? ret_e : NULL;
> }
>
> +/* Return true if OP is invariant. */
> +
> +static bool
> +loop_invariant_op_p (class loop *loop,
> + tree op)
> +{
> + if (is_gimple_min_invariant (op))
> + return true;
> + if (SSA_NAME_IS_DEFAULT_DEF (op)
> + || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op))))
> + return true;
> + return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2;
> +}
> +
> +/* Return true if OP looks like it is derived from IV. */
> +
> +static bool
> +loop_iv_derived_p (class loop *loop,
> + tree op)
> +{
> + /* Always check for invariant first. */
> + gcc_checking_assert (!is_gimple_min_invariant (op)
> + && !SSA_NAME_IS_DEFAULT_DEF (op)
> + && flow_bb_inside_loop_p (loop,
> + gimple_bb (SSA_NAME_DEF_STMT (op))));
> + return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1;
> +}
> +
> /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
> instructions should be duplicated, limit is decreased by the actual
> amount. */
>
> static bool
> should_duplicate_loop_header_p (basic_block header, class loop *loop,
> - int *limit)
> + int *limit,
> + hash_set <edge> *invariant_exits)
> {
> gimple_stmt_iterator bsi;
>
> @@ -152,15 +181,20 @@ should_duplicate_loop_header_p (basic_block header,
> class loop *loop,
>
> for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi);
> gsi_next (&psi))
> - {
> - gphi *phi = psi.phi ();
> - tree res = gimple_phi_result (phi);
> - if (INTEGRAL_TYPE_P (TREE_TYPE (res))
> - || POINTER_TYPE_P (TREE_TYPE (res)))
> - gimple_set_uid (phi, 1 /* IV */);
> - else
> - gimple_set_uid (phi, 0);
> - }
> + /* If this is actual loop header PHIs indicate that the SSA_NAME
> + may be IV. Otherwise just give up. */
> + if (header == loop->header)
> + {
> + gphi *phi = psi.phi ();
> + tree res = gimple_phi_result (phi);
> + if (INTEGRAL_TYPE_P (TREE_TYPE (res))
> + || POINTER_TYPE_P (TREE_TYPE (res)))
> + gimple_set_uid (phi, 1 /* IV */);
> + else
> + gimple_set_uid (phi, 0);
> + }
> + else
> + gimple_set_uid (psi.phi (), 0);
>
> /* Count number of instructions and punt on calls.
> Populate stmts INV/IV flag to later apply heuristics to the
> @@ -201,21 +235,26 @@ should_duplicate_loop_header_p (basic_block header,
> class loop *loop,
> /* Classify the stmt based on whether its computation is based
> on a IV or whether it is invariant in the loop. */
> gimple_set_uid (last, 0);
> - if (!gimple_vuse (last))
> + if (!gimple_vuse (last)
> + && gimple_code (last) != GIMPLE_ASM
> + && (gimple_code (last) != GIMPLE_CALL
> + || gimple_call_flags (last) & ECF_CONST))
> {
> bool inv = true;
> - bool iv = false;
> + bool iv = true;
> ssa_op_iter i;
> tree op;
> FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
> - if (!SSA_NAME_IS_DEFAULT_DEF (op)
> - && flow_bb_inside_loop_p (loop,
> - gimple_bb (SSA_NAME_DEF_STMT (op))))
> + if (!loop_invariant_op_p (loop, op))
> {
> - if (!(gimple_uid (SSA_NAME_DEF_STMT (op)) & 2 /* INV */))
> + if (!loop_iv_derived_p (loop, op))
> + {
> + inv = false;
> + iv = false;
> + break;
> + }
> + else
> inv = false;
> - if (gimple_uid (SSA_NAME_DEF_STMT (op)) & 1 /* IV */)
> - iv = true;
> }
> gimple_set_uid (last, (iv ? 1 : 0) | (inv ? 2 : 0));
> }
> @@ -225,16 +264,32 @@ should_duplicate_loop_header_p (basic_block header,
> class loop *loop,
> the loop further. Unless this is the original loop header. */
> tree lhs = gimple_cond_lhs (last);
> tree rhs = gimple_cond_rhs (last);
> + bool lhs_invariant = loop_invariant_op_p (loop, lhs);
> + bool rhs_invariant = loop_invariant_op_p (loop, rhs);
> + if (lhs_invariant && rhs_invariant)
> + {
> + if (invariant_exits)
> + {
> + edge e;
> + edge_iterator ei;
> + FOR_EACH_EDGE (e, ei, header->succs)
> + if (loop_exit_edge_p (loop, e))
> + {
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + fprintf (dump_file,
> + " Will elliminate invariant exit %i->%i\n",
> + e->src->index, e->dest->index);
> + invariant_exits->add (e);
> + }
> + }
> + return true;
> + }
> + /* TODO: This is heuristics that claims that IV based ocnditionals will
> + likely be optimized out in duplicated header. We could use ranger
> + query instead to tell this more precisely. */
> if (header != loop->header
> - && ((TREE_CODE (lhs) == SSA_NAME
> - && !SSA_NAME_IS_DEFAULT_DEF (lhs)
> - && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (lhs)))
> - && gimple_uid (SSA_NAME_DEF_STMT (lhs)) == 0)
> - || (TREE_CODE (rhs) == SSA_NAME
> - && !SSA_NAME_IS_DEFAULT_DEF (rhs)
> - && flow_bb_inside_loop_p (loop,
> - gimple_bb (SSA_NAME_DEF_STMT (rhs)))
> - && gimple_uid (SSA_NAME_DEF_STMT (rhs)) == 0)))
> + && ((!lhs_invariant && !loop_iv_derived_p (loop, lhs))
> + || (!rhs_invariant && !loop_iv_derived_p (loop, rhs))))
> {
> if (dump_file && (dump_flags & TDF_DETAILS))
> fprintf (dump_file,
> @@ -452,7 +507,7 @@ ch_base::copy_headers (function *fun)
> continue;
> }
>
> - if (should_duplicate_loop_header_p (header, loop, &remaining_limit))
> + if (should_duplicate_loop_header_p (header, loop, &remaining_limit,
> NULL))
> candidates.safe_push ({loop, static_exit});
> }
> /* Do not use ranger after we change the IL and not have updated SSA. */
> @@ -482,10 +537,12 @@ ch_base::copy_headers (function *fun)
> profile_count entry_count = profile_count::zero ();
> edge e;
> edge_iterator ei;
> + hash_set <edge> invariant_exits;
> FOR_EACH_EDGE (e, ei, loop->header->preds)
> if (e->src != loop->latch)
> entry_count += e->count ();
> - while (should_duplicate_loop_header_p (header, loop, &remaining_limit))
> + while (should_duplicate_loop_header_p (header, loop, &remaining_limit,
> + &invariant_exits))
> {
> if (dump_file && (dump_flags & TDF_DETAILS))
> fprintf (dump_file, " Will duplicate bb %i\n", header->index);
> @@ -537,7 +594,8 @@ ch_base::copy_headers (function *fun)
>
> propagate_threaded_block_debug_into (exit->dest, entry->dest);
> if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs,
> - true, candidate.static_exit))
> + true, candidate.static_exit,
> + &invariant_exits))
> {
> if (dump_file && (dump_flags & TDF_DETAILS))
> fprintf (dump_file, "Duplication failed.\n");
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c
> b/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c
> index 16340868abf..bfb0f17284d 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-ch-profile-1.c
> @@ -9,4 +9,4 @@ void test(int v, int q)
> /* { dg-final { scan-tree-dump-not "Invalid sum" "ch2"} } */
> /* dom2 optimizes out the redundant test for loop invariant v/q
> which leads to inconsistent profile. */
> -/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" { xfail *-*-*
> }} } */
> +/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)