> > > > Am 16.11.2024 um 14:08 schrieb Jan Hubicka <hubi...@ucw.cz>: > > > > Ignore conditions guarding __builtin_unreachable in inliner metrics > > > > This patch extends my last year attempt to make inliner metric ignore > > conditionals guarding __builtin_unreachable. Compared to previous patch, > > this > > one implements a "mini-dce" in ipa-fnsummary to avoid accounting all > > statements > > that are only used to determine conditionals guarding __builtin_unnecesary. > > These will be removed later once value ranges are determined. > > > > While working on this, I noticed that we do have a lot of dead code while > > computing fnsummary for early inline. Those are only used to apply > > large-function growth, but it seems there is enough dead code to make this > > valud kind of irrelevant. Also there seems to be quite a lot of const/pure > > calls that can be cheaply removed before we inline them. So I wonder if we > > want to run one DCE before early inlining. > > I would not have expected a ‚lot‘ of dead const function calls. By same > argument we should rather run CCP before inlining as that tends to prune most > dead code early? It was bit more common then I would have expeced. Orginally I was looking for a bug in the mark minipass, but in the IPA fnsummary pass this does not happen. I plan to do some stats.
It may be interesting to gather some stats if getting rid of dead calls pre-inlining has a chance to pay back in saved effort on inlining. If we did ccp/dce combo, we could also enable better early inlining decisions using ipa-predicates. I did some tests on this when we moved inliner to SSA form, but that was quite different situation then today. Honza > > Richard > > > Bootstrapped/regtested x86_64-linux, comitted. > > > > gcc/ChangeLog: > > > > PR tree-optimization/109442 > > * ipa-fnsummary.cc (builtin_unreachable_bb_p): New function. > > (guards_builtin_unreachable): New function. > > (STMT_NECESSARY): New macro. > > (mark_stmt_necessary): New function. > > (mark_operand_necessary): New function. > > (find_necessary_statements): New function. > > (analyze_function_body): Use it. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.dg/ipa/fnsummary-1.c: New test. > > > > diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc > > index e921cd495f6..87e08dad846 100644 > > --- a/gcc/ipa-fnsummary.cc > > +++ b/gcc/ipa-fnsummary.cc > > @@ -2674,6 +2674,169 @@ points_to_possible_sra_candidate_p (tree t) > > return false; > > } > > > > +/* Return true if BB only calls builtin_unreachable. > > + We skip empty basic blocks, debug statements, clobbers and predicts. > > + CACHE is used to memoize already analyzed blocks. */ > > + > > +static bool > > +builtin_unreachable_bb_p (basic_block bb, vec<unsigned char> &cache) > > +{ > > + if (cache[bb->index]) > > + return cache[bb->index] - 1; > > + gimple_stmt_iterator si; > > + auto_vec <basic_block, 4> visited_bbs; > > + bool ret = false; > > + while (true) > > + { > > + bool empty_bb = true; > > + visited_bbs.safe_push (bb); > > + cache[bb->index] = 3; > > + for (si = gsi_start_nondebug_bb (bb); > > + !gsi_end_p (si) && empty_bb; > > + gsi_next_nondebug (&si)) > > + { > > + if (gimple_code (gsi_stmt (si)) != GIMPLE_PREDICT > > + && !gimple_clobber_p (gsi_stmt (si)) > > + && !gimple_nop_p (gsi_stmt (si))) > > + { > > + empty_bb = false; > > + break; > > + } > > + } > > + if (!empty_bb) > > + break; > > + else > > + bb = single_succ_edge (bb)->dest; > > + if (cache[bb->index]) > > + { > > + ret = cache[bb->index] == 3 ? false : cache[bb->index] - 1; > > + goto done; > > + } > > + } > > + if (gimple_call_builtin_p (gsi_stmt (si), BUILT_IN_UNREACHABLE) > > + || gimple_call_builtin_p (gsi_stmt (si), BUILT_IN_UNREACHABLE_TRAP)) > > + ret = true; > > +done: > > + for (basic_block vbb:visited_bbs) > > + cache[vbb->index] = (unsigned char)ret + 1; > > + return ret; > > +} > > + > > +static bool > > +guards_builtin_unreachable (basic_block bb, vec<unsigned char> &cache) > > +{ > > + edge_iterator ei; > > + edge e; > > + FOR_EACH_EDGE (e, ei, bb->succs) > > + if (builtin_unreachable_bb_p (e->dest, cache)) > > + { > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > + fprintf (dump_file, > > + "BB %i ends with conditional guarding __builtin_unreachable;" > > + " conditinal is unnecesary\n", bb->index); > > + return true; > > + } > > + return false; > > +} > > + > > +#define STMT_NECESSARY GF_PLF_1 > > + > > +/* If STMT is not already marked necessary, mark it, and add it to the > > + worklist if ADD_TO_WORKLIST is true. */ > > + > > +static inline void > > +mark_stmt_necessary (gimple *stmt, auto_vec<gimple *> &worklist) > > +{ > > + gcc_assert (stmt); > > + > > + if (gimple_plf (stmt, STMT_NECESSARY)) > > + return; > > + > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > + { > > + fprintf (dump_file, "Marking useful stmt: "); > > + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); > > + fprintf (dump_file, "\n"); > > + } > > + > > + gimple_set_plf (stmt, STMT_NECESSARY, true); > > + worklist.safe_push (stmt); > > +} > > + > > +/* Mark the statement defining operand OP as necessary. */ > > + > > +static inline void > > +mark_operand_necessary (tree op, auto_vec<gimple *> &worklist) > > +{ > > + gimple *stmt = SSA_NAME_DEF_STMT (op); > > + if (gimple_nop_p (stmt)) > > + return; > > + mark_stmt_necessary (stmt, worklist); > > +} > > + > > +/* Mark all statements that will remain in the body after optimizing out > > + conditionals guarding __builtin_unreachable which we keep to preserve > > + value ranges. */ > > + > > +static void > > +find_necessary_statements (struct cgraph_node *node) > > +{ > > + struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); > > + auto_vec<unsigned char, 10> cache; > > + basic_block bb; > > + auto_vec<gimple *> worklist; > > + > > + cache.safe_grow_cleared (last_basic_block_for_fn (cfun)); > > + /* Mark all obviously necessary statements. */ > > + FOR_EACH_BB_FN (bb, my_function) > > + { > > + for (gimple_stmt_iterator gsi = gsi_start_phis (bb); > > + !gsi_end_p (gsi); gsi_next (&gsi)) > > + gimple_set_plf (gsi_stmt (gsi), STMT_NECESSARY, false); > > + > > + for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); > > + gsi_next_nondebug (&bsi)) > > + { > > + gimple *stmt = gsi_stmt (bsi); > > + > > + gimple_set_plf (stmt, STMT_NECESSARY, false); > > + if (gimple_has_side_effects (stmt) > > + || (is_ctrl_stmt (stmt) > > + && (gimple_code (stmt) != GIMPLE_COND > > + || !guards_builtin_unreachable (bb, cache))) > > + || gimple_store_p (stmt)) > > + mark_stmt_necessary (stmt, worklist); > > + } > > + } > > + while (worklist.length () > 0) > > + { > > + gimple *stmt = worklist.pop (); > > + > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > + { > > + fprintf (dump_file, "processing: "); > > + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); > > + fprintf (dump_file, "\n"); > > + } > > + if (gimple_code (stmt) == GIMPLE_PHI) > > + for (unsigned int k = 0; k < gimple_phi_num_args (stmt); k++) > > + { > > + tree arg = PHI_ARG_DEF (stmt, k); > > + > > + if (TREE_CODE (arg) == SSA_NAME) > > + mark_operand_necessary (arg, worklist); > > + } > > + else > > + { > > + ssa_op_iter iter; > > + tree use; > > + > > + FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) > > + mark_operand_necessary (use, worklist); > > + } > > + } > > +} > > + > > /* Analyze function body for NODE. > > EARLY indicates run from early optimization pipeline. */ > > > > @@ -2718,7 +2881,7 @@ analyze_function_body (struct cgraph_node *node, bool > > early) > > calculate_dominance_info (CDI_DOMINATORS); > > calculate_dominance_info (CDI_POST_DOMINATORS); > > if (!early) > > - loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); > > + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); > > else > > { > > ipa_check_create_node_params (); > > @@ -2765,6 +2928,7 @@ analyze_function_body (struct cgraph_node *node, bool > > early) > > > > if (fbi.info) > > compute_bb_predicates (&fbi, node, info, params_summary); > > + find_necessary_statements (node); > > const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; > > order = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); > > nblocks = pre_and_rev_post_order_compute (NULL, order, false); > > @@ -2830,6 +2994,21 @@ analyze_function_body (struct cgraph_node *node, > > bool early) > > !gsi_end_p (bsi); gsi_next_nondebug (&bsi)) > > { > > gimple *stmt = gsi_stmt (bsi); > > + if (!gimple_plf (stmt, STMT_NECESSARY)) > > + { > > + if (dump_file && (dump_flags & TDF_DETAILS)) > > + { > > + fprintf (dump_file, " skipping unnecesary stmt "); > > + print_gimple_stmt (dump_file, stmt, 0); > > + } > > + /* TODO: const calls used only to produce values for > > + builtion_unreachable guards should not be accounted. However > > + we still want to inline them and this does does not work well > > + with the cost model. For now account them as usual. */ > > + if (!is_gimple_call (stmt) > > + || gimple_call_internal_p (stmt)) > > + continue; > > + } > > int this_size = estimate_num_insns (stmt, &eni_size_weights); > > int this_time = estimate_num_insns (stmt, &eni_time_weights); > > int prob; > > diff --git a/gcc/testsuite/gcc.dg/ipa/fnsummary-1.c > > b/gcc/testsuite/gcc.dg/ipa/fnsummary-1.c > > new file mode 100644 > > index 00000000000..abc9dcfee38 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/ipa/fnsummary-1.c > > @@ -0,0 +1,9 @@ > > +/* { dg-options "-O2 -fdump-ipa-fnsummary-details" } */ > > +int > > +test(int a, int b) > > +{ > > + if (a / b > 10) > > + __builtin_unreachable (); > > +} > > +/* { dg-final { scan-ipa-dump "ends with conditional guarding > > __builtin_unreachable" "fnsummary" } } */ > > +/* { dg-final { scan-ipa-dump-times "skipping unnecesary stmt" 2 > > "fnsummary" } } */