Hi,
this patch makes FDO inliner to be more aggressive on inlining function
calls that are considered hot.  This is based on observation that
INLINE_INSNS_AUTO is the most common reason for inlining not happening
(20.5% for Firefox, where 63.2% of calls are not inlinable because body
is not avaiable) and 66% for GCC.

With this patch INLINE_HINT_known_hot hint is added to edges that was
determined to be hot by profile and moreover there is at least 50%
chance that caller will invoke the call during its execution.

With this hint we now ignore both limits - this is because the greedy algorithm
driven by speed/size_cost metric should work pretty well here, but we may want
to revisit it (i.e. add INLINE_INSNS_FDO or so).  I am on the aggressive side so
we collect some data on when the profile is a win or loss.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

        * ipa-inline.h (INLINE_HINT_known_hot): New hint.
        * ipa-inline-analysis.c (dump_inline_hints): Dump it.
        (do_estimate_edge_time): Compute it.
        * ipa-inline.c (want_inline_small_function_p): Bypass
        INLINE_INSNS_AUTO/SINGLE limits for calls that are known
        to be hot.
Index: ipa-inline.h
===================================================================
--- ipa-inline.h        (revision 209489)
+++ ipa-inline.h        (working copy)
@@ -68,7 +68,9 @@ enum inline_hints_vals {
   INLINE_HINT_cross_module = 64,
   /* If array indexes of loads/stores become known there may be room for
      further optimization.  */
-  INLINE_HINT_array_index = 128
+  INLINE_HINT_array_index = 128,
+  /* We know that the callee is hot by profile.  */
+  INLINE_HINT_known_hot = 256
 };
 typedef int inline_hints;
 
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c       (revision 209489)
+++ ipa-inline-analysis.c       (working copy)
@@ -671,6 +671,11 @@ dump_inline_hints (FILE *f, inline_hints
       hints &= ~INLINE_HINT_array_index;
       fprintf (f, " array_index");
     }
+  if (hints & INLINE_HINT_known_hot)
+    {
+      hints &= ~INLINE_HINT_known_hot;
+      fprintf (f, " known_hot");
+    }
   gcc_assert (!hints);
 }
 
@@ -3666,6 +3671,17 @@ do_estimate_edge_time (struct cgraph_edg
                                &known_aggs);
   estimate_node_size_and_time (callee, clause, known_vals, known_binfos,
                               known_aggs, &size, &min_size, &time, &hints, 
es->param);
+
+  /* When we have profile feedback, we can quite safely identify hot
+     edges and for those we disable size limits.  Don't do that when
+     probability that caller will call the callee is low however, since it
+     may hurt optimization of the caller's hot path.  */
+  if (edge->count && cgraph_maybe_hot_edge_p (edge)
+      && (edge->count * 2
+          > (edge->caller->global.inlined_to
+            ? edge->caller->global.inlined_to->count : edge->caller->count)))
+    hints |= INLINE_HINT_known_hot;
+
   known_vals.release ();
   known_binfos.release ();
   known_aggs.release ();
Index: ipa-inline.c
===================================================================
--- ipa-inline.c        (revision 209522)
+++ ipa-inline.c        (working copy)
@@ -578,18 +578,21 @@ want_inline_small_function_p (struct cgr
      inline cnadidate.  At themoment we allow inline hints to
      promote non-inline function to inline and we increase
      MAX_INLINE_INSNS_SINGLE 16fold for inline functions.  */
-  else if (!DECL_DECLARED_INLINE_P (callee->decl)
+  else if ((!DECL_DECLARED_INLINE_P (callee->decl)
+          && (!e->count || !cgraph_maybe_hot_edge_p (e)))
           && inline_summary (callee)->min_size - inline_edge_summary 
(e)->call_stmt_size
              > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
     {
       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
       want_inline = false;
     }
-  else if (DECL_DECLARED_INLINE_P (callee->decl)
+  else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
           && inline_summary (callee)->min_size - inline_edge_summary 
(e)->call_stmt_size
              > 16 * MAX_INLINE_INSNS_SINGLE)
     {
-      e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
+      e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
+                         ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
+                         : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
       want_inline = false;
     }
   else
@@ -606,6 +609,7 @@ want_inline_small_function_p (struct cgr
               && growth >= MAX_INLINE_INSNS_SINGLE
               && ((!big_speedup
                    && !(hints & (INLINE_HINT_indirect_call
+                                 | INLINE_HINT_known_hot
                                  | INLINE_HINT_loop_iterations
                                  | INLINE_HINT_array_index
                                  | INLINE_HINT_loop_stride)))
@@ -630,6 +634,7 @@ want_inline_small_function_p (struct cgr
         inlining given function is very profitable.  */
       else if (!DECL_DECLARED_INLINE_P (callee->decl)
               && !big_speedup
+              && !(hints & INLINE_HINT_known_hot)
               && growth >= ((hints & (INLINE_HINT_indirect_call
                                       | INLINE_HINT_loop_iterations
                                       | INLINE_HINT_array_index

Reply via email to