Currently, if could, scev-cprop unconditionally replaces loop closed ssa with an expression built from loop initial value and loop niter, which might cause redundant code-gen when all interior computations related to IV inside loop are also neccessary. As example, for the below case:
p = init_addr; for (i = 0; i < N; i++) { p++; *p = ...; } . = p; Then scev-cprop would end up with code: p = init_addr; for (i = 0; i < N; i++) { p++; *p = ...; } . = init_addr + N; // Redundant computation For bitmask-manipulation loop, it may result in more and costy re-evaluation, such as popcount. To target the issue, we need a means as statement necessity propagation used in DCE, to figure out if impacted IVs are really needed. As pointed out by Richard, we could wire scev-cprop into DCE, here this patch makes the thing. But one difference is that we consider retaining scev-cprop pass, and extends its opt flag to support both this new (-ftree-scev-cprop[=1] by default) and the original (-ftree-scev-cprop=2). In reality, I think the new way could get us more compact and faster code at most occasions, however, it is possible the original handling might be better, because replacement could impact folding of statements following loop, for example, p = init_addr; for (i = 0; i < N; i++) { p++; *p = ...; } p1 = p; ... a = p1 - init_addr; // a = (init_addr + N) - init_addr = N b = p1 - N; // b = (init_addr + N) - N = init_addr It is hard to take this into cost-model consideration, in that global-wide check on folding opportunities might be time-consuming, and the above case is not that common. Therefore, as a backup, we leave the original means still there, so that give user an ability to enable it when some case matches with the scenario. Thanks, Feng --- gcc/ PR tree-optimization/90594 * common.opt (ftree-scev-cprop=): New option. (ftree-scev-cprop): Change it to be alias of ftree-scev-cprop=. * tree-scalar-evolution.cc (simple_scev_final_value): Make it be global function. (apply_scev_final_value_replacement): Likewise. * tree-scalar-evolution.h (scev_const_prop): Remove declaration. (simple_scev_final_value): Add new declaration. (apply_scev_final_value_replacement): Likewise. * tree-ssa-dce.cc (stmt_stats): Add new field sccp_replaced_phis. (scev_cprop_entry): New struct. (scev_cprop_level): New static variable. (scev_cprop_map): Likewise. (mark_expr_operand_necessary): New function. (get_loop_closed_phi_scev_replacement): Likewise. (propagate_necessity): Change neccssity propagation for loop closed phi when scev-cpropr is enabled. (fold_scev_cprop_entry): New function. (remove_dead_phis): Rename to replace_or_remove_phis. And do scev final value replacement for loop closed phi. (eliminate_unnecessary_stmts): Changed to call replace_or_remove_phis. (print_stats): Print stats for replaced phi. (tree_dce_init): Initialize scev_cprop_map. (tree_dce_done): Delete scev_cprop_map. (perform_tree_ssa_dce): Make it be global function. Add scev-cprop specific handling. * tree-ssa-dce.h (perform_tree_ssa_dce): Add new declaration. * tree-ssa-loop.cc (pass_scev_cprop::execute): Changed to call perform_tree_ssa_dce. --- gcc/common.opt | 9 +- gcc/tree-scalar-evolution.cc | 6 +- gcc/tree-scalar-evolution.h | 3 +- gcc/tree-ssa-dce.cc | 251 ++++++++++++++++++++++++++++++++--- gcc/tree-ssa-dce.h | 1 + gcc/tree-ssa-loop.cc | 8 +- 6 files changed, 249 insertions(+), 29 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index a42537c5f1e..98210ed72fd 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3425,8 +3425,15 @@ ftree-vect-loop-version Common Ignore Does nothing. Preserved for backward compatibility. +; If this option is 1, only perform scev cprop when all statements to evaluate +; related IV inside loop could be eliminated, if it is 2, perform scev cprop +; unconditionally. +ftree-scev-cprop= +Common Joined RejectNegative UInteger Var(flag_tree_scev_cprop) Init(1) Optimization IntegerRange(0, 2) +Enable copy propagation of scalar-evolution information. + ftree-scev-cprop -Common Var(flag_tree_scev_cprop) Init(1) Optimization +Common Alias(ftree-scev-cprop=,1,0) Enable copy propagation of scalar-evolution information. ftrivial-auto-var-init= diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index 5733632aa78..9e51b18b237 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -3381,7 +3381,7 @@ scev_finalize (void) } /* Returns true if the expression EXPR is considered to be too expensive - for scev_const_prop. Sets *COND_OVERFLOW_P to true when the + for scev const prop. Sets *COND_OVERFLOW_P to true when the expression might contain a sub-expression that is subject to undefined overflow behavior and conditionally evaluated. */ @@ -3782,7 +3782,7 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef, final value to avoid overflow UB when replacement would really happen later. */ -static bool +bool simple_scev_final_value (class loop *loop, tree value, tree *final_value, bool *rewrite_overflow) { @@ -3877,7 +3877,7 @@ simple_scev_final_value (class loop *loop, tree value, tree *final_value, have to leave an unused copy assignment around, if so, ALWAYS_KEEP is set to true. */ -static void +void apply_scev_final_value_replacement (gphi *phi, tree final_value, bool rewrite_overflow, bool always_keep) { diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h index 41ba6b237b5..4510e0c52e9 100644 --- a/gcc/tree-scalar-evolution.h +++ b/gcc/tree-scalar-evolution.h @@ -35,7 +35,8 @@ extern tree instantiate_scev (edge, class loop *, tree); extern tree resolve_mixers (class loop *, tree, bool *); extern void gather_stats_on_scev_database (void); extern bool final_value_replacement_loop (class loop *); -extern unsigned int scev_const_prop (void); +extern bool simple_scev_final_value (class loop *, tree, tree *, bool *); +extern void apply_scev_final_value_replacement (gphi *, tree, bool, bool); extern bool expression_expensive_p (tree, bool *); extern bool simple_iv_with_niters (class loop *, class loop *, tree, struct affine_iv *, tree *, bool); diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc index ad3ac2785cf..5a667505dfb 100644 --- a/gcc/tree-ssa-dce.cc +++ b/gcc/tree-ssa-dce.cc @@ -59,8 +59,10 @@ along with GCC; see the file COPYING3. If not see #include "tree-eh.h" #include "gimplify.h" #include "gimple-iterator.h" +#include "gimplify-me.h" #include "tree-cfg.h" #include "tree-ssa-loop-niter.h" +#include "tree-ssa-loop-manip.h" #include "tree-into-ssa.h" #include "tree-dfa.h" #include "cfgloop.h" @@ -77,8 +79,16 @@ static struct stmt_stats int total_phis; int removed; int removed_phis; + int sccp_replaced_phis; } stats; +struct scev_cprop_entry +{ + tree value; + bool rewrite_overflow; + bool visited; +}; + #define STMT_NECESSARY GF_PLF_1 static vec<gimple *> worklist; @@ -94,6 +104,14 @@ static sbitmap last_stmt_necessary; /* Vector indicating that BB contains statements that are live. */ static sbitmap bb_contains_live_stmts; +/* Control level of scev cprop. */ +static int scev_cprop_level; + +/* For a loop closed PHI, if the induction variable at loop exit could be + calculated using initial value and loop niter, we would add a map between + definition of the PHI and the induction final value. */ +static hash_map<tree, scev_cprop_entry> *scev_cprop_map; + /* Before we can determine whether a control branch is dead, we need to compute which blocks are control dependent on which edges. @@ -241,6 +259,18 @@ mark_operand_necessary (tree op) worklist.safe_push (stmt); } +/* Mark operand stored in TP as necessary if it is a ssa name. */ + +tree +mark_expr_operand_necessary (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + if (TREE_CODE (*tp) == SSA_NAME) + mark_operand_necessary (*tp); + + return NULL_TREE; +} + /* Return true if STMT is a call to allocation function that can be optimized out if the memory block is never used for anything else then NULL pointer check or free. @@ -765,6 +795,49 @@ valid_new_delete_pair_p (gimple *new_call, gimple *delete_call) return valid_new_delete_pair_p (new_asm, delete_asm); } +/* Given a PHI, check if it is a loop closed PHI, and related induction + variable has a simple final value that could be directly calculated + with its initial value and loop niter. If satisfied, insert a new + entry to describe this when ADD_NEW is true, or return the existing + matched entry when ADD_NEW is false, otherwise, return NULL. */ + +static scev_cprop_entry * +get_loop_closed_phi_scev_replacement (gphi *phi, bool add_new) +{ + /* Do nothing if scep prop is not enabled or this is definitely + not a loop closed PHI. */ + if (scev_cprop_level < 1 || gimple_phi_num_args (phi) != 1) + return NULL; + + tree result = gimple_phi_result (phi); + + if (!add_new) + return scev_cprop_map->get (result); + + edge exit = single_pred_edge (gimple_bb (phi)); + tree arg = gimple_phi_arg_def (phi, 0); + + if (TREE_CODE (arg) != SSA_NAME) + return NULL; + + class loop *loop = exit->src->loop_father; + scev_cprop_entry entry = {}; + bool existed; + + if (!simple_scev_final_value (loop, arg, &entry.value, + &entry.rewrite_overflow)) + return NULL; + + auto &new_entry = scev_cprop_map->get_or_insert (result, &existed); + + /* Based on the way of DCE propagation, an expression would not be handled + more than once. */ + gcc_assert (!existed); + new_entry = entry; + + return &new_entry; +} + /* Propagate necessity using the operands of necessary statements. Process the uses on each statement in the worklist, and add all feeding statements which contribute to the calculation of this @@ -817,12 +890,33 @@ propagate_necessity (bool aggressive) gphi *phi = as_a <gphi *> (stmt); size_t k; - for (k = 0; k < gimple_phi_num_args (stmt); k++) - { - tree arg = PHI_ARG_DEF (stmt, k); - if (TREE_CODE (arg) == SSA_NAME) - mark_operand_necessary (arg); - } + /* When scev cprop is enabled, computation of induction variable + might not be really needed, if its final value at loop exit could + be directly calculated using initial value and loop niter, and + any interior evaluation around the induction variable does not + participate in other necessary statements in the loop. So we only + propagate necessity through ssa operands in the final value. + An example: + + v = init; + + for (i = 0; i < N; i++) + v += step; + + . = v; + + The loop could be completely removed, and replaced with a simple + evaluation as: v = init + step * N, therefore, only "init", "step" + and "N" are actually necessary. */ + if (auto *entry = get_loop_closed_phi_scev_replacement (phi, true)) + walk_tree (&entry->value, mark_expr_operand_necessary, NULL, NULL); + else + for (k = 0; k < gimple_phi_num_args (stmt); k++) + { + tree arg = PHI_ARG_DEF (stmt, k); + if (TREE_CODE (arg) == SSA_NAME) + mark_operand_necessary (arg); + } /* For PHI operands it matters from where the control flow arrives to the BB. Consider the following example: @@ -1104,10 +1198,80 @@ propagate_necessity (bool aggressive) } } -/* Remove dead PHI nodes from block BB. */ +/* Try to fold the final value of ENTRY to a constant or ssa name. Since + one entry may refer ssa name that is described by other entry, so this + function would recursively process folding along the def/use relation. + If the processed value does not belong to any scev_cprop entry, it is + stored in VALUE_PTR, and ENTRY is set to NULL. Return true if the value + does be simplified. */ static bool -remove_dead_phis (basic_block bb) +fold_scev_cprop_entry (scev_cprop_entry *entry, tree *value_ptr = NULL) +{ + if (entry) + { + if (entry->visited || CONSTANT_CLASS_P (entry->value)) + return true; + + entry->visited = true; + entry->value = unshare_expr (entry->value); + value_ptr = &entry->value; + } + + tree value = *value_ptr; + + if (TREE_CODE (value) == SSA_NAME) + { + scev_cprop_entry *def_entry; + + if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (value) + && (def_entry = scev_cprop_map->get (value))) + { + fold_scev_cprop_entry (def_entry); + + if (CONSTANT_CLASS_P (def_entry->value) || + TREE_CODE (def_entry->value) == SSA_NAME) + { + *value_ptr = def_entry->value; + return true; + } + } + + return false; + } + + bool changed = false; + + if (EXPR_P (value)) + { + for (int i = 0; i < TREE_OPERAND_LENGTH (value); i++) + { + tree &opnd = TREE_OPERAND (value, i); + + if (opnd && !CONSTANT_CLASS_P (opnd)) + changed |= fold_scev_cprop_entry (NULL, &opnd); + } + } + + if (changed) + { + /* Only fold the value if any of its operand has been folded. */ + tree new_value = fold (value); + + if (new_value == value) + changed = false; + else + *value_ptr = new_value; + } + + return changed; +} + +/* Remove dead PHI nodes from block BB, and try to replace loop closed PHIs + with scev final values. */ + +static bool +replace_or_remove_phis (basic_block bb) { bool something_changed = false; gphi *phi; @@ -1158,6 +1322,38 @@ remove_dead_phis (basic_block bb) stats.removed_phis++; continue; } + else if (auto *entry = get_loop_closed_phi_scev_replacement (phi, false)) + { + tree arg = gimple_phi_arg_def (phi, 0); + + fold_scev_cprop_entry (entry); + + if (scev_cprop_level > 1 + || !bitmap_bit_p (processed, SSA_NAME_VERSION (arg)) + || CONSTANT_CLASS_P (entry->value) + || TREE_CODE (entry->value) == SSA_NAME) + { + something_changed = true; + stats.sccp_replaced_phis++; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replacing : "); + print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); + fprintf (dump_file, " with : "); + print_generic_expr (dump_file, gimple_phi_result (phi)); + fprintf (dump_file, " = "); + print_generic_expr (dump_file, entry->value); + fprintf (dump_file, ";\n\n"); + } + + /* Advance iterator before the PHI is removed. */ + gsi_next (&gsi); + apply_scev_final_value_replacement (phi, entry->value, + entry->rewrite_overflow, + false); + continue; + } + } gsi_next (&gsi); } @@ -1611,7 +1807,7 @@ eliminate_unnecessary_stmts (bool aggressive) } /* Remove dead PHI nodes. */ - something_changed |= remove_dead_phis (bb); + something_changed |= replace_or_remove_phis (bb); } /* First remove queued edges. */ @@ -1755,6 +1951,11 @@ print_stats (void) fprintf (dump_file, "Removed %d of %d PHI nodes (%d%%)\n", stats.removed_phis, stats.total_phis, (int) percg); + + if (stats.sccp_replaced_phis) + fprintf (dump_file, "Replaced %d PHI node%s by SCCP\n", + stats.sccp_replaced_phis, + stats.sccp_replaced_phis > 1 ? "s" : ""); } /* Initialization for this pass. Set up the used data structures. */ @@ -1775,6 +1976,11 @@ tree_dce_init (bool aggressive) processed = sbitmap_alloc (num_ssa_names + 1); bitmap_clear (processed); + if (scev_cprop_level > 0) + scev_cprop_map = new hash_map<tree, scev_cprop_entry>(); + else + scev_cprop_map = NULL; + worklist.create (64); cfg_altered = false; } @@ -1795,6 +2001,9 @@ tree_dce_done (bool aggressive) sbitmap_free (processed); + if (scev_cprop_map) + delete scev_cprop_map; + worklist.release (); } @@ -2005,23 +2214,29 @@ make_forwarders_with_degenerate_phis (function *fn) In aggressive mode, control dependences are taken into account, which results in more dead code elimination, but at the cost of some time. */ -static unsigned int -perform_tree_ssa_dce (bool aggressive) +unsigned int +perform_tree_ssa_dce (bool aggressive, int scev_cprop = 0) { bool something_changed = 0; unsigned todo = 0; + bool need_loop = aggressive || scev_cprop > 0; /* Preheaders are needed for SCEV to work. Simple lateches and recorded exits improve chances that loop will proved to be finite in testcases such as in loop-15.c and loop-24.c */ bool in_loop_pipeline = scev_initialized_p (); - if (aggressive && ! in_loop_pipeline) + if (need_loop && ! in_loop_pipeline) { loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); scev_initialize (); } + scev_cprop_level = scev_cprop; + + if (scev_cprop > 0 && !loops_state_satisfies_p (LOOP_CLOSED_SSA)) + rewrite_into_loop_closed_ssa (NULL, 0); + if (aggressive) todo |= make_forwarders_with_degenerate_phis (cfun); @@ -2044,12 +2259,6 @@ perform_tree_ssa_dce (bool aggressive) find_obviously_necessary_stmts (aggressive); - if (aggressive && ! in_loop_pipeline) - { - scev_finalize (); - loop_optimizer_finalize (); - } - longest_chain = 0; total_chain = 0; nr_walks = 0; @@ -2058,6 +2267,12 @@ perform_tree_ssa_dce (bool aggressive) propagate_necessity (aggressive); BITMAP_FREE (visited); + if (need_loop && ! in_loop_pipeline) + { + scev_finalize (); + loop_optimizer_finalize (); + } + something_changed |= eliminate_unnecessary_stmts (aggressive); something_changed |= cfg_altered; diff --git a/gcc/tree-ssa-dce.h b/gcc/tree-ssa-dce.h index b0e92a58ea8..ef5b77ce36a 100644 --- a/gcc/tree-ssa-dce.h +++ b/gcc/tree-ssa-dce.h @@ -18,5 +18,6 @@ along with GCC; see the file COPYING3. If not see #ifndef TREE_SSA_DCE_H #define TREE_SSA_DCE_H +extern unsigned perform_tree_ssa_dce (bool, int = 0); extern void simple_dce_from_worklist (bitmap, bitmap = nullptr); #endif diff --git a/gcc/tree-ssa-loop.cc b/gcc/tree-ssa-loop.cc index 1d9afd98015..04fd58c0dc5 100644 --- a/gcc/tree-ssa-loop.cc +++ b/gcc/tree-ssa-loop.cc @@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-loop-manip.h" #include "tree-ssa-loop-niter.h" #include "tree-ssa-loop.h" +#include "tree-ssa-dce.h" #include "cfgloop.h" #include "tree-inline.h" #include "tree-scalar-evolution.h" @@ -402,14 +403,9 @@ public: unsigned pass_scev_cprop::execute (function *) { - bool any = false; - /* Perform final value replacement in loops, in case the replacement expressions are cheap. */ - for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) - any |= final_value_replacement_loop (loop); - - return any ? TODO_cleanup_cfg | TODO_update_ssa_only_virtuals : 0; + return perform_tree_ssa_dce (false, flag_tree_scev_cprop); } } // anon namespace -- 2.17.1