> +/* Knob to control hot-caller heuristic. 0 means it is turned off, 1 means
> + it is always applied, and 2 means it is applied only if the footprint is
> + smaller than PARAM_HOT_CALLER_CODESIZE_THRESHOLD. */
> DEFPARAM (PARAM_INLINE_HOT_CALLER,
> "inline-hot-caller",
> "Consider cold callsites for inlining if caller contains hot code",
> + 2, 0, 2)
> +
> +/* The maximum code size estimate under which hot caller heuristic is
> + applied. */
> +DEFPARAM(PARAM_HOT_CALLER_CODESIZE_THRESHOLD,
> + "hot-caller-codesize-threshold",
> + "Maximum profile-based code size footprint estimate for "
> + "hot caller heuristic ",
> + 10000, 0, 0)
Out of curiousity, how sensitive is performance to the value of this
parameter? I.e. is there a clear cutoff for the codes that benefit
from disabling this inlining vs those that benefit from enabling it?
Also, have you tried spec2006? I remember that the codesize of the gcc
benchmark was above the larger 15000 threshold I use for tuning down
unrolling/peeling, and I needed to refine my heuristics to identify
profitable loops to unroll/peel even in the case of large codesize.
I'm not sure if there are more benchmarks that will be above the
smaller 10K threshold.
> +
> +DEFPARAM (PARAM_INLINE_USEFUL_COLD_CALLEE,
> + "inline-useful-cold-callee",
> + "Consider cold callsites for inlining if caller contains hot code",
> 1, 0, 1)
The description of this param is wrong (it is the same as the
description of PARAM_INLINE_HOT_CALLER). It should probably be
something like
"Only consider cold callsites for inlining if analysis finds
optimization opportunities"
>
> /* Limit of iterations of early inliner. This basically bounds number of
> Index: gcc/ipa-inline.c
> ===================================================================
> --- gcc/ipa-inline.c (revision 201768)
> +++ gcc/ipa-inline.c (working copy)
> @@ -528,12 +528,60 @@ big_speedup_p (struct cgraph_edge *e)
> return false;
> }
>
> +/* Returns true if callee of edge E is considered useful to inline
> + even if it is cold. A callee is considered useful if there is at
> + least one argument of pointer type that is not a pass-through. */
Can you expand this comment a bit to add why such arguments indicate
useful inlining?
Thanks,
Teresa
> +
> +static inline bool
> +useful_cold_callee (struct cgraph_edge *e)
> +{
> + gimple call = e->call_stmt;
> + int n, arg_num = gimple_call_num_args (call);
> + struct ipa_edge_args *args = IPA_EDGE_REF (e);
> +
> + for (n = 0; n < arg_num; n++)
> + {
> + tree arg = gimple_call_arg (call, n);
> + if (POINTER_TYPE_P (TREE_TYPE (arg)))
> + {
> + struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, n);
> + if (jfunc->type != IPA_JF_PASS_THROUGH)
> + return true;
> + }
> + }
> + return false;
> +}
> +
> +/* Returns true if hot caller heuristic should be used. */
> +
> +static inline bool
> +enable_hot_caller_heuristic (void)
> +{
> +
> + gcov_working_set_t *ws = NULL;
> + int size_threshold = PARAM_VALUE (PARAM_HOT_CALLER_CODESIZE_THRESHOLD);
> + int num_counters = 0;
> + int param_inline_hot_caller = PARAM_VALUE (PARAM_INLINE_HOT_CALLER);
> +
> + if (param_inline_hot_caller == 0)
> + return false;
> + else if (param_inline_hot_caller == 1)
> + return true;
> +
> + ws = find_working_set(PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
> + if (!ws)
> + return false;
> + num_counters = ws->num_counters;
> + return num_counters <= size_threshold;
> +
> +}
> /* Returns true if an edge or its caller are hot enough to
> be considered for inlining. */
>
> static bool
> edge_hot_enough_p (struct cgraph_edge *edge)
> {
> + static bool use_hot_caller_heuristic = enable_hot_caller_heuristic ();
> if (cgraph_maybe_hot_edge_p (edge))
> return true;
>
> @@ -543,9 +591,17 @@ edge_hot_enough_p (struct cgraph_edge *edge)
> if (flag_auto_profile && edge->callee->count == 0
> && edge->callee->max_bb_count > 0)
> return false;
> - if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER)
> - && maybe_hot_count_p (NULL, edge->caller->max_bb_count))
> - return true;
> + if (use_hot_caller_heuristic)
> + {
> + struct cgraph_node *where = edge->caller;
> + if (maybe_hot_count_p (NULL, where->max_bb_count))
> + {
> + if (PARAM_VALUE (PARAM_INLINE_USEFUL_COLD_CALLEE))
> + return useful_cold_callee (edge);
> + else
> + return true;
> + }
> + }
> return false;
> }
On Tue, Aug 20, 2013 at 12:26 PM, Easwaran Raman <[email protected]> wrote:
> The current hot caller heuristic simply promotes edges whose caller is
> hot. This patch does the following:
> * Turn it off for applications with large footprint since the size
> increase hurts them
> * Be more selective by considering arguments to callee when the
> heuristic is enabled.
>
> This performs well on internal benchmarks. Ok for google/4_8 branch if
> all tests pass?
>
> - Easwaran
--
Teresa Johnson | Software Engineer | [email protected] | 408-460-2413