On Fri, 7 Jul 2023, Tamar Christina wrote:
> Hi All,
>
> This patch builds on the previous patch by fixing another issue with the
> way ifcvt currently picks which branches to test.
>
> The issue with the current implementation is while it sorts for
> occurrences of the argument, it doesn't check for complexity of the arguments.
>
> As an example:
>
> <bb 15> [local count: 528603100]:
> ...
> if (distbb_75 >= 0.0)
> goto <bb 17>; [59.00%]
> else
> goto <bb 16>; [41.00%]
>
> <bb 16> [local count: 216727269]:
> ...
> goto <bb 19>; [100.00%]
>
> <bb 17> [local count: 311875831]:
> ...
> if (distbb_75 < iftmp.0_98)
> goto <bb 18>; [20.00%]
> else
> goto <bb 19>; [80.00%]
>
> <bb 18> [local count: 62375167]:
> ...
>
> <bb 19> [local count: 528603100]:
> # prephitmp_175 = PHI <_173(18), 0.0(17), _174(16)>
>
> All tree arguments to the PHI have the same number of occurrences, namely 1,
> however it makes a big difference which comparison we test first.
>
> Sorting only on occurrences we'll pick the compares coming from BB 18 and BB
> 17,
> This means we end up generating 4 comparisons, while 2 would have been enough.
>
> By keeping track of the "complexity" of the COND in each BB, (i.e. the number
> of comparisons needed to traverse from the start [BB 15] to end [BB 19]) and
> using a key tuple of <occurrences, complexity> we end up selecting the compare
> from BB 16 and BB 18 first. BB 16 only requires 1 compare, and BB 18, after
> we
> test BB 16 also only requires one additional compare. This change paired with
> the one previous above results in the optimal 2 compares.
>
> For deep nesting, i.e. for
>
> ...
> _79 = vr_15 > 20;
> _80 = _68 & _79;
> _82 = vr_15 <= 20;
> _83 = _68 & _82;
> _84 = vr_15 < -20;
> _85 = _73 & _84;
> _87 = vr_15 >= -20;
> _88 = _73 & _87;
> _ifc__111 = _55 ? 10 : 12;
> _ifc__112 = _70 ? 7 : _ifc__111;
> _ifc__113 = _85 ? 8 : _ifc__112;
> _ifc__114 = _88 ? 9 : _ifc__113;
> _ifc__115 = _45 ? 1 : _ifc__114;
> _ifc__116 = _63 ? 3 : _ifc__115;
> _ifc__117 = _65 ? 4 : _ifc__116;
> _ifc__118 = _83 ? 6 : _ifc__117;
> _ifc__119 = _60 ? 2 : _ifc__118;
> _ifc__120 = _43 ? 13 : _ifc__119;
> _ifc__121 = _75 ? 11 : _ifc__120;
> vw_1 = _80 ? 5 : _ifc__121;
>
> Most of the comparisons are still needed because the chain of
> occurrences to not negate eachother. i.e. _80 is _73 & vr_15 >= -20 and
> _85 is _73 & vr_15 < -20. clearly given _73 needs to be true in both
> branches,
> the only additional test needed is on vr_15, where the one test is the
> negation
> of the other. So we don't need to do the comparison of _73 twice.
>
> The changes in the patch reduces the overall number of compares by one, but
> has
> a bigger effect on the dependency chain.
>
> Previously we would generate 5 instructions chain:
>
> cmple p7.s, p4/z, z29.s, z30.s
> cmpne p7.s, p7/z, z29.s, #0
> cmple p6.s, p7/z, z31.s, z30.s
> cmpge p6.s, p6/z, z27.s, z25.s
> cmplt p15.s, p6/z, z28.s, z21.s
>
> as the longest chain. With this patch we generate 3:
>
> cmple p7.s, p3/z, z27.s, z30.s
> cmpne p7.s, p7/z, z27.s, #0
> cmpgt p7.s, p7/z, z31.s, z30.s
>
> and I don't think (x <= y) && (x != 0) && (z > y) can be reduced further.
>
> Bootstrapped and Regtested on aarch64-none-linux-gnu and no issues.
>
> Not sure how to write a non-fragile testcase for this as the
> conditionals chosen depends on threading etc. Any Suggestions?
>
> Ok for master?
OK.
Likewise for the testcase - GIMPLE one starting at fix_loops.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/109154
> * tree-if-conv.cc (INCLUDE_ALGORITHM): Include.
> (struct bb_predicate): Add no_predicate_stmts.
> (set_bb_predicate): Increase predicate count.
> (set_bb_predicate_gimplified_stmts): Conditionally initialize
> no_predicate_stmts.
> (get_bb_num_predicate_stmts): New.
> (init_bb_predicate): Initialzie no_predicate_stmts.
> (release_bb_predicate): Cleanup no_predicate_stmts.
> (insert_gimplified_predicates): Preserve no_predicate_stmts.
>
> --- inline copy of patch --
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index
> 16b36dd8b0226f796c1a3fc6d45a9059385e812b..0ed50d99c46f99a4d1ea0e827ee2b2a3f494b2da
> 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -80,6 +80,7 @@ along with GCC; see the file COPYING3. If not see
> <L18>:;
> */
>
> +#define INCLUDE_ALGORITHM
> #include "config.h"
> #include "system.h"
> #include "coretypes.h"
> @@ -231,6 +232,10 @@ struct bb_predicate {
> recorded here, in order to avoid the duplication of computations
> that occur in previous conditions. See PR44483. */
> gimple_seq predicate_gimplified_stmts;
> +
> + /* Records the number of statements recorded into
> + PREDICATE_GIMPLIFIED_STMTS. */
> + unsigned no_predicate_stmts;
> };
>
> /* Returns true when the basic block BB has a predicate. */
> @@ -254,10 +259,16 @@ bb_predicate (basic_block bb)
> static inline void
> set_bb_predicate (basic_block bb, tree cond)
> {
> + auto aux = (struct bb_predicate *) bb->aux;
> gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
> && is_gimple_val (TREE_OPERAND (cond, 0)))
> || is_gimple_val (cond));
> - ((struct bb_predicate *) bb->aux)->predicate = cond;
> + aux->predicate = cond;
> + aux->no_predicate_stmts++;
> +
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + fprintf (dump_file, "Recording block %d value %d\n", bb->index,
> + aux->no_predicate_stmts);
> }
>
> /* Returns the sequence of statements of the gimplification of the
> @@ -270,12 +281,16 @@ bb_predicate_gimplified_stmts (basic_block bb)
> }
>
> /* Sets the sequence of statements STMTS of the gimplification of the
> - predicate for basic block BB. */
> + predicate for basic block BB. If PRESERVE_COUNTS then don't clear the
> predicate
> + counts. */
>
> static inline void
> -set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
> +set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts,
> + bool preserve_counts)
> {
> ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
> + if (stmts == NULL && !preserve_counts)
> + ((struct bb_predicate *) bb->aux)->no_predicate_stmts = 0;
> }
>
> /* Adds the sequence of statements STMTS to the sequence of statements
> @@ -296,18 +311,28 @@ add_bb_predicate_gimplified_stmts (basic_block bb,
> gimple_seq stmts)
> gimple *stmt = gsi_stmt (gsi);
> delink_stmt_imm_use (stmt);
> gimple_set_modified (stmt, true);
> + ((struct bb_predicate *) bb->aux)->no_predicate_stmts++;
> }
> gimple_seq_add_seq_without_update
> (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts),
> stmts);
> }
>
> +/* Return the number of statements the predicate of the basic block consists
> + of. */
> +
> +static inline unsigned
> +get_bb_num_predicate_stmts (basic_block bb)
> +{
> + return ((struct bb_predicate *) bb->aux)->no_predicate_stmts;
> +}
> +
> /* Initializes to TRUE the predicate of basic block BB. */
>
> static inline void
> init_bb_predicate (basic_block bb)
> {
> bb->aux = XNEW (struct bb_predicate);
> - set_bb_predicate_gimplified_stmts (bb, NULL);
> + set_bb_predicate_gimplified_stmts (bb, NULL, false);
> set_bb_predicate (bb, boolean_true_node);
> }
>
> @@ -327,7 +352,7 @@ release_bb_predicate (basic_block bb)
>
> /* Discard them. */
> gimple_seq_discard (stmts);
> - set_bb_predicate_gimplified_stmts (bb, NULL);
> + set_bb_predicate_gimplified_stmts (bb, NULL, false);
> }
> }
>
> @@ -2041,7 +2066,6 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator
> *gsi)
> tree rhs, res, arg0, arg1, op0, op1, scev;
> tree cond;
> unsigned int index0;
> - unsigned int max, args_len;
> edge e;
> basic_block bb;
> unsigned int i;
> @@ -2145,7 +2169,6 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator
> *gsi)
> bool swap = false;
> hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
> unsigned int num_args = gimple_phi_num_args (phi);
> - int max_ind = -1;
> /* Vector of different PHI argument values. */
> auto_vec<tree> args (num_args);
>
> @@ -2160,28 +2183,38 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator
> *gsi)
> phi_arg_map.get_or_insert (arg).safe_push (i);
> }
>
> - /* Determine element with max number of occurrences. */
> - max_ind = -1;
> - max = 1;
> - args_len = args.length ();
> - for (i = 0; i < args_len; i++)
> + /* Determine element with max number of occurrences and complexity.
> Looking at only
> + number of occurrences as a measure for complexity isn't enough as all
> usages can
> + be unique but the comparisons to reach the PHI node differ per branch.
> */
> + typedef std::pair <tree, std::pair <unsigned, unsigned>> ArgEntry;
> + auto_vec<ArgEntry> argsKV;
> + for (i = 0; i < args.length (); i++)
> {
> - unsigned int len;
> - if ((len = phi_arg_map.get (args[i])->length ()) > max)
> + unsigned int len = 0;
> + for (int index : phi_arg_map.get (args[i]))
> {
> - max_ind = (int) i;
> - max = len;
> + edge e = gimple_phi_arg_edge (phi, index);
> + len += get_bb_num_predicate_stmts (e->src);
> }
> +
> + unsigned occur = phi_arg_map.get (args[i])->length ();
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + fprintf (dump_file, "Ranking %d as len=%d, idx=%d\n", i, len, occur);
> + argsKV.safe_push ({ args[i], { len, occur }});
> }
>
> - /* Put element with max number of occurences to the end of ARGS. */
> - if (max_ind != -1 && max_ind + 1 != (int) args_len)
> - std::swap (args[args_len - 1], args[max_ind]);
> + /* Sort elements based on rankings ARGS. */
> + std::sort(argsKV.begin(), argsKV.end(), [](ArgEntry &left, ArgEntry
> &right) {
> + return left.second < right.second;
> + });
> +
> + for (i = 0; i < args.length (); i++)
> + args[i] = argsKV[i].first;
>
> /* Handle one special case when number of arguments with different values
> is equal 2 and one argument has the only occurrence. Such PHI can be
> handled as if would have only 2 arguments. */
> - if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
> + if (args.length () == 2 && phi_arg_map.get (args[0])->length () == 1)
> {
> vec<int> *indexes;
> indexes = phi_arg_map.get (args[0]);
> @@ -2317,7 +2350,7 @@ insert_gimplified_predicates (loop_p loop)
> }
>
> /* Once the sequence is code generated, set it to NULL. */
> - set_bb_predicate_gimplified_stmts (bb, NULL);
> + set_bb_predicate_gimplified_stmts (bb, NULL, true);
> }
> }
> }
>
>
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)