Hi, this patch makes FDO inliner to be more aggressive on inlining function calls that are considered hot. This is based on observation that INLINE_INSNS_AUTO is the most common reason for inlining not happening (20.5% for Firefox, where 63.2% of calls are not inlinable because body is not avaiable) and 66% for GCC.
With this patch INLINE_HINT_known_hot hint is added to edges that was determined to be hot by profile and moreover there is at least 50% chance that caller will invoke the call during its execution. With this hint we now ignore both limits - this is because the greedy algorithm driven by speed/size_cost metric should work pretty well here, but we may want to revisit it (i.e. add INLINE_INSNS_FDO or so). I am on the aggressive side so we collect some data on when the profile is a win or loss. Bootstrapped/regtested x86_64-linux, comitted. Honza * ipa-inline.h (INLINE_HINT_known_hot): New hint. * ipa-inline-analysis.c (dump_inline_hints): Dump it. (do_estimate_edge_time): Compute it. * ipa-inline.c (want_inline_small_function_p): Bypass INLINE_INSNS_AUTO/SINGLE limits for calls that are known to be hot. Index: ipa-inline.h =================================================================== --- ipa-inline.h (revision 209489) +++ ipa-inline.h (working copy) @@ -68,7 +68,9 @@ enum inline_hints_vals { INLINE_HINT_cross_module = 64, /* If array indexes of loads/stores become known there may be room for further optimization. */ - INLINE_HINT_array_index = 128 + INLINE_HINT_array_index = 128, + /* We know that the callee is hot by profile. */ + INLINE_HINT_known_hot = 256 }; typedef int inline_hints; Index: ipa-inline-analysis.c =================================================================== --- ipa-inline-analysis.c (revision 209489) +++ ipa-inline-analysis.c (working copy) @@ -671,6 +671,11 @@ dump_inline_hints (FILE *f, inline_hints hints &= ~INLINE_HINT_array_index; fprintf (f, " array_index"); } + if (hints & INLINE_HINT_known_hot) + { + hints &= ~INLINE_HINT_known_hot; + fprintf (f, " known_hot"); + } gcc_assert (!hints); } @@ -3666,6 +3671,17 @@ do_estimate_edge_time (struct cgraph_edg &known_aggs); estimate_node_size_and_time (callee, clause, known_vals, known_binfos, known_aggs, &size, &min_size, &time, &hints, es->param); + + /* When we have profile feedback, we can quite safely identify hot + edges and for those we disable size limits. Don't do that when + probability that caller will call the callee is low however, since it + may hurt optimization of the caller's hot path. */ + if (edge->count && cgraph_maybe_hot_edge_p (edge) + && (edge->count * 2 + > (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to->count : edge->caller->count))) + hints |= INLINE_HINT_known_hot; + known_vals.release (); known_binfos.release (); known_aggs.release (); Index: ipa-inline.c =================================================================== --- ipa-inline.c (revision 209522) +++ ipa-inline.c (working copy) @@ -578,18 +578,21 @@ want_inline_small_function_p (struct cgr inline cnadidate. At themoment we allow inline hints to promote non-inline function to inline and we increase MAX_INLINE_INSNS_SINGLE 16fold for inline functions. */ - else if (!DECL_DECLARED_INLINE_P (callee->decl) + else if ((!DECL_DECLARED_INLINE_P (callee->decl) + && (!e->count || !cgraph_maybe_hot_edge_p (e))) && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO)) { e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; want_inline = false; } - else if (DECL_DECLARED_INLINE_P (callee->decl) + else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count) && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size > 16 * MAX_INLINE_INSNS_SINGLE) { - e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; + e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) + ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT + : CIF_MAX_INLINE_INSNS_AUTO_LIMIT); want_inline = false; } else @@ -606,6 +609,7 @@ want_inline_small_function_p (struct cgr && growth >= MAX_INLINE_INSNS_SINGLE && ((!big_speedup && !(hints & (INLINE_HINT_indirect_call + | INLINE_HINT_known_hot | INLINE_HINT_loop_iterations | INLINE_HINT_array_index | INLINE_HINT_loop_stride))) @@ -630,6 +634,7 @@ want_inline_small_function_p (struct cgr inlining given function is very profitable. */ else if (!DECL_DECLARED_INLINE_P (callee->decl) && !big_speedup + && !(hints & INLINE_HINT_known_hot) && growth >= ((hints & (INLINE_HINT_indirect_call | INLINE_HINT_loop_iterations | INLINE_HINT_array_index