Re: Ignore conditions guarding __builtin_unreachable in inliner metrics

Jan Hubicka Sat, 16 Nov 2024 11:58:42 -0800

> 
> 
> > Am 16.11.2024 um 14:08 schrieb Jan Hubicka <hubi...@ucw.cz>:
> > 
> > Ignore conditions guarding __builtin_unreachable in inliner metrics
> > 
> > This patch extends my last year attempt to make inliner metric ignore
> > conditionals guarding __builtin_unreachable.  Compared to previous patch, 
> > this
> > one implements a "mini-dce" in ipa-fnsummary to avoid accounting all 
> > statements
> > that are only used to determine conditionals guarding __builtin_unnecesary.
> > These will be removed later once value ranges are determined.
> > 
> > While working on this, I noticed that we do have a lot of dead code while
> > computing fnsummary for early inline. Those are only used to apply
> > large-function growth, but it seems there is enough dead code to make this
> > valud kind of irrelevant.  Also there seems to be quite a lot of const/pure
> > calls that can be cheaply removed before we inline them.  So I wonder if we
> > want to run one DCE before early inlining.
> 
> I would not have expected a ‚lot‘ of dead const function calls.  By same 
> argument we should rather run CCP before inlining as that tends to prune most 
> dead code early?
It was bit more common then I would have expeced. Orginally I was
looking for a bug in the mark minipass, but in the IPA fnsummary pass
this does not happen.  I plan to do some stats.


It may be interesting to gather some stats if getting rid of dead calls
pre-inlining has a chance to pay back in saved effort on inlining.  If we did
ccp/dce combo, we could also enable better early inlining decisions
using ipa-predicates.

I did some tests on this when we moved inliner to SSA form, but that was
quite different situation then today.

Honza
> 
> Richard 
> 
> > Bootstrapped/regtested x86_64-linux, comitted.
> > 
> > gcc/ChangeLog:
> > 
> >    PR tree-optimization/109442
> >    * ipa-fnsummary.cc (builtin_unreachable_bb_p): New function.
> >    (guards_builtin_unreachable): New function.
> >    (STMT_NECESSARY): New macro.
> >    (mark_stmt_necessary): New function.
> >    (mark_operand_necessary): New function.
> >    (find_necessary_statements): New function.
> >    (analyze_function_body): Use it.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> >    * gcc.dg/ipa/fnsummary-1.c: New test.
> > 
> > diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc
> > index e921cd495f6..87e08dad846 100644
> > --- a/gcc/ipa-fnsummary.cc
> > +++ b/gcc/ipa-fnsummary.cc
> > @@ -2674,6 +2674,169 @@ points_to_possible_sra_candidate_p (tree t)
> >   return false;
> > }
> > 
> > +/* Return true if BB only calls builtin_unreachable.
> > +   We skip empty basic blocks, debug statements, clobbers and predicts.
> > +   CACHE is used to memoize already analyzed blocks.  */
> > +
> > +static bool
> > +builtin_unreachable_bb_p (basic_block bb, vec<unsigned char> &cache)
> > +{
> > +  if (cache[bb->index])
> > +    return cache[bb->index] - 1;
> > +  gimple_stmt_iterator si;
> > +  auto_vec <basic_block, 4> visited_bbs;
> > +  bool ret = false;
> > +  while (true)
> > +    {
> > +      bool empty_bb = true;
> > +      visited_bbs.safe_push (bb);
> > +      cache[bb->index] = 3;
> > +      for (si = gsi_start_nondebug_bb (bb);
> > +       !gsi_end_p (si) && empty_bb;
> > +       gsi_next_nondebug (&si))
> > +    {
> > +      if (gimple_code (gsi_stmt (si)) != GIMPLE_PREDICT
> > +          && !gimple_clobber_p (gsi_stmt (si))
> > +          && !gimple_nop_p (gsi_stmt (si)))
> > +        {
> > +          empty_bb = false;
> > +          break;
> > +        }
> > +    }
> > +      if (!empty_bb)
> > +    break;
> > +      else
> > +    bb = single_succ_edge (bb)->dest;
> > +      if (cache[bb->index])
> > +    {
> > +      ret = cache[bb->index] == 3 ? false : cache[bb->index] - 1;
> > +      goto done;
> > +    }
> > +    }
> > +  if (gimple_call_builtin_p (gsi_stmt (si), BUILT_IN_UNREACHABLE)
> > +      || gimple_call_builtin_p (gsi_stmt (si), BUILT_IN_UNREACHABLE_TRAP))
> > +    ret = true;
> > +done:
> > +  for (basic_block vbb:visited_bbs)
> > +    cache[vbb->index] = (unsigned char)ret + 1;
> > +  return ret;
> > +}
> > +
> > +static bool
> > +guards_builtin_unreachable (basic_block bb, vec<unsigned char> &cache)
> > +{
> > +  edge_iterator ei;
> > +  edge e;
> > +  FOR_EACH_EDGE (e, ei, bb->succs)
> > +    if (builtin_unreachable_bb_p (e->dest, cache))
> > +      {
> > +    if (dump_file && (dump_flags & TDF_DETAILS))
> > +      fprintf (dump_file,
> > +           "BB %i ends with conditional guarding __builtin_unreachable;"
> > +           " conditinal is unnecesary\n", bb->index);
> > +    return true;
> > +      }
> > +  return false;
> > +}
> > +
> > +#define STMT_NECESSARY GF_PLF_1
> > +
> > +/* If STMT is not already marked necessary, mark it, and add it to the
> > +   worklist if ADD_TO_WORKLIST is true.  */
> > +
> > +static inline void
> > +mark_stmt_necessary (gimple *stmt, auto_vec<gimple *> &worklist)
> > +{
> > +  gcc_assert (stmt);
> > +
> > +  if (gimple_plf (stmt, STMT_NECESSARY))
> > +    return;
> > +
> > +  if (dump_file && (dump_flags & TDF_DETAILS))
> > +    {
> > +      fprintf (dump_file, "Marking useful stmt: ");
> > +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> > +      fprintf (dump_file, "\n");
> > +    }
> > +
> > +  gimple_set_plf (stmt, STMT_NECESSARY, true);
> > +  worklist.safe_push (stmt);
> > +}
> > +
> > +/* Mark the statement defining operand OP as necessary.  */
> > +
> > +static inline void
> > +mark_operand_necessary (tree op, auto_vec<gimple *> &worklist)
> > +{
> > +  gimple *stmt = SSA_NAME_DEF_STMT (op);
> > +  if (gimple_nop_p (stmt))
> > +    return;
> > +  mark_stmt_necessary (stmt, worklist);
> > +}
> > +
> > +/* Mark all statements that will remain in the body after optimizing out
> > +   conditionals guarding __builtin_unreachable which we keep to preserve
> > +   value ranges.  */
> > +
> > +static void
> > +find_necessary_statements (struct cgraph_node *node)
> > +{
> > +  struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
> > +  auto_vec<unsigned char, 10> cache;
> > +  basic_block bb;
> > +  auto_vec<gimple *> worklist;
> > +
> > +  cache.safe_grow_cleared (last_basic_block_for_fn (cfun));
> > +  /* Mark all obviously necessary statements.  */
> > +  FOR_EACH_BB_FN (bb, my_function)
> > +    {
> > +      for (gimple_stmt_iterator gsi = gsi_start_phis (bb);
> > +       !gsi_end_p (gsi); gsi_next (&gsi))
> > +    gimple_set_plf (gsi_stmt (gsi), STMT_NECESSARY, false);
> > +
> > +      for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
> > +       gsi_next_nondebug (&bsi))
> > +    {
> > +      gimple *stmt = gsi_stmt (bsi);
> > +
> > +      gimple_set_plf (stmt, STMT_NECESSARY, false);
> > +      if (gimple_has_side_effects (stmt)
> > +          || (is_ctrl_stmt (stmt)
> > +          && (gimple_code (stmt) != GIMPLE_COND
> > +              || !guards_builtin_unreachable (bb, cache)))
> > +          || gimple_store_p (stmt))
> > +        mark_stmt_necessary (stmt, worklist);
> > +    }
> > +    }
> > +  while (worklist.length () > 0)
> > +    {
> > +      gimple *stmt = worklist.pop ();
> > +
> > +      if (dump_file && (dump_flags & TDF_DETAILS))
> > +    {
> > +      fprintf (dump_file, "processing: ");
> > +      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
> > +      fprintf (dump_file, "\n");
> > +    }
> > +      if (gimple_code (stmt) == GIMPLE_PHI)
> > +    for (unsigned int k = 0; k < gimple_phi_num_args (stmt); k++)
> > +      {
> > +        tree arg = PHI_ARG_DEF (stmt, k);
> > +
> > +        if (TREE_CODE (arg) == SSA_NAME)
> > +          mark_operand_necessary (arg, worklist);
> > +      }
> > +      else
> > +    {
> > +      ssa_op_iter iter;
> > +      tree use;
> > +
> > +      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> > +        mark_operand_necessary (use, worklist);
> > +    }
> > +    }
> > +}
> > +
> > /* Analyze function body for NODE.
> >    EARLY indicates run from early optimization pipeline.  */
> > 
> > @@ -2718,7 +2881,7 @@ analyze_function_body (struct cgraph_node *node, bool 
> > early)
> >       calculate_dominance_info (CDI_DOMINATORS);
> >       calculate_dominance_info (CDI_POST_DOMINATORS);
> >       if (!early)
> > -        loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
> > +    loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
> >       else
> >    {
> >      ipa_check_create_node_params ();
> > @@ -2765,6 +2928,7 @@ analyze_function_body (struct cgraph_node *node, bool 
> > early)
> > 
> >   if (fbi.info)
> >     compute_bb_predicates (&fbi, node, info, params_summary);
> > +  find_necessary_statements (node);
> >   const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
> >   order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
> >   nblocks = pre_and_rev_post_order_compute (NULL, order, false);
> > @@ -2830,6 +2994,21 @@ analyze_function_body (struct cgraph_node *node, 
> > bool early)
> >       !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
> >    {
> >      gimple *stmt = gsi_stmt (bsi);
> > +      if (!gimple_plf (stmt, STMT_NECESSARY))
> > +        {
> > +          if (dump_file && (dump_flags & TDF_DETAILS))
> > +        {
> > +          fprintf (dump_file, "  skipping unnecesary stmt ");
> > +          print_gimple_stmt (dump_file, stmt, 0);
> > +        }
> > +          /* TODO: const calls used only to produce values for
> > +         builtion_unreachable guards should not be accounted.  However
> > +         we still want to inline them and this does does not work well
> > +         with the cost model.  For now account them as usual.  */
> > +          if (!is_gimple_call (stmt)
> > +          || gimple_call_internal_p (stmt))
> > +        continue;
> > +        }
> >      int this_size = estimate_num_insns (stmt, &eni_size_weights);
> >      int this_time = estimate_num_insns (stmt, &eni_time_weights);
> >      int prob;
> > diff --git a/gcc/testsuite/gcc.dg/ipa/fnsummary-1.c 
> > b/gcc/testsuite/gcc.dg/ipa/fnsummary-1.c
> > new file mode 100644
> > index 00000000000..abc9dcfee38
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/ipa/fnsummary-1.c
> > @@ -0,0 +1,9 @@
> > +/* { dg-options "-O2 -fdump-ipa-fnsummary-details"  } */
> > +int
> > +test(int a, int b)
> > +{
> > +    if (a / b > 10)
> > +        __builtin_unreachable ();
> > +}
> > +/* { dg-final { scan-ipa-dump "ends with conditional guarding 
> > __builtin_unreachable" "fnsummary"  } } */
> > +/* { dg-final { scan-ipa-dump-times "skipping unnecesary stmt" 2 
> > "fnsummary"  } } */

Re: Ignore conditions guarding __builtin_unreachable in inliner metrics

Reply via email to