On Fri, 7 Jul 2023, Tamar Christina wrote:
> Hi All,
>
> Following on from Jakub's patch in g:de0ee9d14165eebb3d31c84e98260c05c3b33acb
> these two patches finishes the work fixing the regression and improves
> codegen.
>
> As explained in that commit, ifconvert sorts PHI args in increasing number of
> occurrences in order to reduce the number of comparisons done while
> traversing the tree.
>
> The remaining task that this patch fixes is dealing with the long chain of
> comparisons that can be created from phi nodes, particularly when they share
> any common successor (classical example is a diamond node).
>
> on a PHI-node the true and else branches carry a condition, true will
> carry `a` and false `~a`. The issue is that at the moment GCC tests both `a`
> and `~a` when the phi node has more than 2 arguments. Clearly this isn't
> needed. The deeper the nesting of phi nodes the larger the repetition.
>
> As an example, for
>
> foo (int *f, int d, int e)
> {
> for (int i = 0; i < 1024; i++)
> {
> int a = f[i];
> int t;
> if (a < 0)
> t = 1;
> else if (a < e)
> t = 1 - a * d;
> else
> t = 0;
> f[i] = t;
> }
> }
>
> after Jakub's patch we generate:
>
> _7 = a_10 < 0;
> _21 = a_10 >= 0;
> _22 = a_10 < e_11(D);
> _23 = _21 & _22;
> _ifc__42 = _23 ? t_13 : 0;
> t_6 = _7 ? 1 : _ifc__42
>
> but while better than before it is still inefficient, since in the false
> branch, where we know ~_7 is true, we still test _21.
>
> This leads to superfluous tests for every diamond node. After this patch we
> generate
>
> _7 = a_10 < 0;
> _22 = a_10 < e_11(D);
> _ifc__42 = _22 ? t_13 : 0;
> t_6 = _7 ? 1 : _ifc__42;
>
> Which correctly elides the test of _21. This is done by borrowing the
> vectorizer's helper functions to limit predicate mask usages. Ifcvt will
> chain
> conditionals on the false edge (unless specifically inverted) so this patch on
> creating cond a ? b : c, will register ~a when traversing c. If c is a
> conditional then c will be simplified to the smaller possible predicate given
> the assumptions we already know to be true.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Not sure how to write a non-fragile testcase for this as the
> conditionals chosen depends on threading etc. Any Suggestions?
>
> Ok for master?
OK.
For a testcase I wonder if you can produce a GIMPLE FE one starting
with pass_fix_loops? (I think it's still not possible to start
when in LC SSA)
Thanks,
Richard.
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/109154
> * tree-if-conv.cc (gen_simplified_condition,
> gen_phi_nest_statement): New.
> (gen_phi_arg_condition, predicate_scalar_phi): Use it.
>
> --- inline copy of patch --
> diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
> index
> e342532a343a3c066142adeec5fdfaf736a653e5..16b36dd8b0226f796c1a3fc6d45a9059385e812b
> 100644
> --- a/gcc/tree-if-conv.cc
> +++ b/gcc/tree-if-conv.cc
> @@ -1870,12 +1870,44 @@ convert_scalar_cond_reduction (gimple *reduc,
> gimple_stmt_iterator *gsi,
> return rhs;
> }
>
> +/* Generate a simplified conditional. */
> +
> +static tree
> +gen_simplified_condition (tree cond, scalar_cond_masked_set_type &cond_set)
> +{
> + /* Check if the value is already live in a previous branch. This resolves
> + nested conditionals from diamond PHI reductions. */
> + if (TREE_CODE (cond) == SSA_NAME)
> + {
> + gimple *stmt = SSA_NAME_DEF_STMT (cond);
> + gassign *assign = NULL;
> + if ((assign = as_a <gassign *> (stmt))
> + && gimple_assign_rhs_code (assign) == BIT_AND_EXPR)
> + {
> + tree arg1 = gimple_assign_rhs1 (assign);
> + tree arg2 = gimple_assign_rhs2 (assign);
> + if (cond_set.contains ({ arg1, 1 }))
> + arg1 = boolean_true_node;
> + else
> + arg1 = gen_simplified_condition (arg1, cond_set);
> +
> + if (cond_set.contains ({ arg2, 1 }))
> + arg2 = boolean_true_node;
> + else
> + arg2 = gen_simplified_condition (arg2, cond_set);
> +
> + cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, arg1, arg2);
> + }
> + }
> + return cond;
> +}
> +
> /* Produce condition for all occurrences of ARG in PHI node. Set *INVERT
> as to whether the condition is inverted. */
>
> static tree
> -gen_phi_arg_condition (gphi *phi, vec<int> *occur,
> - gimple_stmt_iterator *gsi, bool *invert)
> +gen_phi_arg_condition (gphi *phi, vec<int> *occur, gimple_stmt_iterator *gsi,
> + scalar_cond_masked_set_type &cond_set, bool *invert)
> {
> int len;
> int i;
> @@ -1902,6 +1934,8 @@ gen_phi_arg_condition (gphi *phi, vec<int> *occur,
> c = TREE_OPERAND (c, 0);
> *invert = true;
> }
> +
> + c = gen_simplified_condition (c, cond_set);
> c = force_gimple_operand_gsi (gsi, unshare_expr (c),
> true, NULL_TREE, true, GSI_SAME_STMT);
> if (cond != NULL_TREE)
> @@ -1913,11 +1947,79 @@ gen_phi_arg_condition (gphi *phi, vec<int> *occur,
> }
> else
> cond = c;
> +
> + /* Register the new possibly simplified conditional. When more than 2
> + entries in a phi node we chain entries in the false branch, so the
> + inverted condition is active. */
> + scalar_cond_masked_key pred_cond ({ cond, 1 });
> + if (!invert)
> + pred_cond.inverted_p = !pred_cond.inverted_p;
> + cond_set.add (pred_cond);
> }
> gcc_assert (cond != NULL_TREE);
> return cond;
> }
>
> +/* Create the smallest nested conditional possible. On pre-order we record
> + which conditionals are live, and on post-order rewrite the chain by
> removing
> + already active conditions.
> +
> + As an example we simplify:
> +
> + _7 = a_10 < 0;
> + _21 = a_10 >= 0;
> + _22 = a_10 < e_11(D);
> + _23 = _21 & _22;
> + _ifc__42 = _23 ? t_13 : 0;
> + t_6 = _7 ? 1 : _ifc__42
> +
> + into
> +
> + _7 = a_10 < 0;
> + _22 = a_10 < e_11(D);
> + _ifc__42 = _22 ? t_13 : 0;
> + t_6 = _7 ? 1 : _ifc__42;
> +
> + which produces better code. */
> +
> +static tree
> +gen_phi_nest_statement (gphi *phi, gimple_stmt_iterator *gsi,
> + scalar_cond_masked_set_type &cond_set, tree type,
> + hash_map<tree_operand_hash, auto_vec<int>> &phi_arg_map,
> + gimple **res_stmt, tree lhs0, vec<tree> &args,
> + unsigned idx)
> +{
> + if (idx == args.length ())
> + return args[idx - 1];
> +
> + vec<int> *indexes = phi_arg_map.get (args[idx - 1]);
> + bool invert;
> + tree cond = gen_phi_arg_condition (phi, indexes, gsi, cond_set, &invert);
> + tree arg1 = gen_phi_nest_statement (phi, gsi, cond_set, type, phi_arg_map,
> + res_stmt, lhs0, args, idx + 1);
> +
> + unsigned prev = idx;
> + unsigned curr = prev - 1;
> + tree arg0 = args[curr];
> + tree rhs, lhs;
> + if (idx > 1)
> + lhs = make_temp_ssa_name (type, NULL, "_ifc_");
> + else
> + lhs = lhs0;
> +
> + if (invert)
> + rhs = fold_build_cond_expr (type, unshare_expr (cond),
> + arg1, arg0);
> + else
> + rhs = fold_build_cond_expr (type, unshare_expr (cond),
> + arg0, arg1);
> + gassign *new_stmt = gimple_build_assign (lhs, rhs);
> + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
> + update_stmt (new_stmt);
> + *res_stmt = new_stmt;
> + return lhs;
> +}
> +
> /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
> This routine can handle PHI nodes with more than two arguments.
>
> @@ -1968,6 +2070,8 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator
> *gsi)
> }
>
> bb = gimple_bb (phi);
> + /* Keep track of conditionals already seen. */
> + scalar_cond_masked_set_type cond_set;
> if (EDGE_COUNT (bb->preds) == 2)
> {
> /* Predicate ordinary PHI node with 2 arguments. */
> @@ -1976,6 +2080,7 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator
> *gsi)
> first_edge = EDGE_PRED (bb, 0);
> second_edge = EDGE_PRED (bb, 1);
> cond = bb_predicate (first_edge->src);
> + cond_set.add ({ cond, 1 });
> if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
> std::swap (first_edge, second_edge);
> if (EDGE_COUNT (first_edge->src->succs) > 1)
> @@ -1988,7 +2093,9 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator
> *gsi)
> }
> else
> cond = bb_predicate (first_edge->src);
> +
> /* Gimplify the condition to a valid cond-expr conditonal operand. */
> + cond = gen_simplified_condition (cond, cond_set);
> cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true,
> NULL_TREE, true, GSI_SAME_STMT);
> true_bb = first_edge->src;
> @@ -2110,31 +2217,9 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator
> *gsi)
> else
> {
> /* Common case. */
> - vec<int> *indexes;
> tree type = TREE_TYPE (gimple_phi_result (phi));
> - tree lhs;
> - arg1 = args[args_len - 1];
> - for (i = args_len - 1; i > 0; i--)
> - {
> - arg0 = args[i - 1];
> - indexes = phi_arg_map.get (args[i - 1]);
> - if (i != 1)
> - lhs = make_temp_ssa_name (type, NULL, "_ifc_");
> - else
> - lhs = res;
> - bool invert;
> - cond = gen_phi_arg_condition (phi, indexes, gsi, &invert);
> - if (invert)
> - rhs = fold_build_cond_expr (type, unshare_expr (cond),
> - arg1, arg0);
> - else
> - rhs = fold_build_cond_expr (type, unshare_expr (cond),
> - arg0, arg1);
> - new_stmt = gimple_build_assign (lhs, rhs);
> - gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
> - update_stmt (new_stmt);
> - arg1 = lhs;
> - }
> + gen_phi_nest_statement (phi, gsi, cond_set, type, phi_arg_map,
> + &new_stmt, res, args, 1);
> }
>
> if (dump_file && (dump_flags & TDF_DETAILS))
>
>
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)