Hi, this patch should chase away the expensive thunks and aliases walks from most of analysis code. I think only real use left is local_p predicate that needs to stay because i386 expect local flag to match between caller and callee when expanding assembler thunk. I at least optimized it by first moving the walk to be conditional for nonlocal functions only and then reorganizing call_for_symbol_thunks_and_aliases to first inspect aliases (that is cheap) and only then work on thunks. Most likely this will find the non-local thunk/alias faster. Other cases was leftovers from the conversion of thunks from aliases to functions.
I also noticed a bug in ipa-profile that does not disable all the transofrms with !ipa_profile_flag used on OPTIMIZTION_NODE and fixed it. Bootstrapped/regtested x86_64-linux, comitted. I would be interested to know if the call_for_symbol_thunks_and_aliases is now off your oprofiles (sorry, easier to type than perf-profiles) Honza * ipa-visibility.c (function_and_variable_visibility): Only check locality if node is not already local. * ipa-inline.c (want_inline_function_to_all_callers_p): Use call_for_symbol_and_aliases instead of call_for_symbol_thunks_and_aliases. (ipa_inline): Likewise. * cgraph.c (cgraph_node::call_for_symbol_thunks_and_aliases): first walk aliases. * ipa.c (symbol_table::remove_unreachable_nodes): Use call_for_symbol_and_aliases. * ipa-profile.c (ipa_propagate_frequency_data): Add function_symbol. (ipa_propagate_frequency_1): Use it; use opt_for_fn (ipa_propagate_frequency): Update. (ipa_profile): Add opt_for_fn gueards. Index: ipa-visibility.c =================================================================== --- ipa-visibility.c (revision 220741) +++ ipa-visibility.c (working copy) @@ -595,7 +595,8 @@ function_and_variable_visibility (bool w } FOR_EACH_DEFINED_FUNCTION (node) { - node->local.local |= node->local_p (); + if (!node->local.local) + node->local.local |= node->local_p (); /* If we know that function can not be overwritten by a different semantics and moreover its section can not be discarded, replace all direct calls Index: ipa-inline.c =================================================================== --- ipa-inline.c (revision 220741) +++ ipa-inline.c (working copy) @@ -975,14 +975,14 @@ want_inline_function_to_all_callers_p (s if (node->global.inlined_to) return false; /* Does it have callers? */ - if (!node->call_for_symbol_thunks_and_aliases (has_caller_p, NULL, true)) + if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true)) return false; /* Inlining into all callers would increase size? */ if (estimate_growth (node) > 0) return false; /* All inlines must be possible. */ - if (node->call_for_symbol_thunks_and_aliases (check_callers, &has_hot_call, - true)) + if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call, + true)) return false; if (!cold && !has_hot_call) return false; @@ -2359,9 +2359,9 @@ ipa_inline (void) if (want_inline_function_to_all_callers_p (node, cold)) { int num_calls = 0; - node->call_for_symbol_thunks_and_aliases (sum_callers, &num_calls, - true); - while (node->call_for_symbol_thunks_and_aliases + node->call_for_symbol_and_aliases (sum_callers, &num_calls, + true); + while (node->call_for_symbol_and_aliases (inline_to_all_callers, &num_calls, true)) ; remove_functions = true; Index: cgraph.c =================================================================== --- cgraph.c (revision 220741) +++ cgraph.c (working copy) @@ -2191,6 +2191,16 @@ cgraph_node::call_for_symbol_thunks_and_ if (callback (this, data)) return true; + FOR_EACH_ALIAS (this, ref) + { + cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring); + if (include_overwritable + || alias->get_availability () > AVAIL_INTERPOSABLE) + if (alias->call_for_symbol_thunks_and_aliases (callback, data, + include_overwritable, + exclude_virtual_thunks)) + return true; + } for (e = callers; e; e = e->next_caller) if (e->caller->thunk.thunk_p && (include_overwritable @@ -2202,16 +2212,6 @@ cgraph_node::call_for_symbol_thunks_and_ exclude_virtual_thunks)) return true; - FOR_EACH_ALIAS (this, ref) - { - cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring); - if (include_overwritable - || alias->get_availability () > AVAIL_INTERPOSABLE) - if (alias->call_for_symbol_thunks_and_aliases (callback, data, - include_overwritable, - exclude_virtual_thunks)) - return true; - } return false; } Index: ipa.c =================================================================== --- ipa.c (revision 220741) +++ ipa.c (working copy) @@ -661,7 +661,7 @@ symbol_table::remove_unreachable_nodes ( if (node->address_taken && !node->used_from_other_partition) { - if (!node->call_for_symbol_thunks_and_aliases + if (!node->call_for_symbol_and_aliases (has_addr_references_p, NULL, true) && (!node->instrumentation_clone || !node->instrumented_version Index: ipa-profile.c =================================================================== --- ipa-profile.c (revision 220741) +++ ipa-profile.c (working copy) @@ -322,6 +322,7 @@ ipa_profile_read_summary (void) struct ipa_propagate_frequency_data { + cgraph_node *function_symbol; bool maybe_unlikely_executed; bool maybe_executed_once; bool only_called_at_startup; @@ -342,7 +343,7 @@ ipa_propagate_frequency_1 (struct cgraph || d->only_called_at_startup || d->only_called_at_exit); edge = edge->next_caller) { - if (edge->caller != node) + if (edge->caller != d->function_symbol) { d->only_called_at_startup &= edge->caller->only_called_at_startup; /* It makes sense to put main() together with the static constructors. @@ -358,7 +359,11 @@ ipa_propagate_frequency_1 (struct cgraph errors can make us to push function into unlikely section even when it is executed by the train run. Transfer the function only if all callers are unlikely executed. */ - if (profile_info && flag_branch_probabilities + if (profile_info + && opt_for_fn (d->function_symbol->decl, flag_branch_probabilities) + /* Thunks are not profiled. This is more or less implementation + bug. */ + && !d->function_symbol->thunk.thunk_p && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED || (edge->caller->global.inlined_to && edge->caller->global.inlined_to->frequency @@ -418,7 +423,7 @@ contains_hot_call_p (struct cgraph_node bool ipa_propagate_frequency (struct cgraph_node *node) { - struct ipa_propagate_frequency_data d = {true, true, true, true}; + struct ipa_propagate_frequency_data d = {node, true, true, true, true}; bool changed = false; /* We can not propagate anything useful about externally visible functions @@ -432,8 +437,8 @@ ipa_propagate_frequency (struct cgraph_n if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Processing frequency %s\n", node->name ()); - node->call_for_symbol_thunks_and_aliases (ipa_propagate_frequency_1, &d, - true); + node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d, + true); if ((d.only_called_at_startup && !d.only_called_at_exit) && !node->only_called_at_startup) @@ -597,6 +602,9 @@ ipa_profile (void) { bool update = false; + if (!opt_for_fn (n->decl, flag_ipa_profile)) + continue; + for (e = n->indirect_calls; e; e = e->next_callee) { if (n->count) @@ -697,7 +705,9 @@ ipa_profile (void) order_pos = ipa_reverse_postorder (order); for (i = order_pos - 1; i >= 0; i--) { - if (order[i]->local.local && ipa_propagate_frequency (order[i])) + if (order[i]->local.local + && opt_for_fn (order[i]->decl, flag_ipa_profile) + && ipa_propagate_frequency (order[i])) { for (e = order[i]->callees; e; e = e->next_callee) if (e->callee->local.local && !e->callee->aux) @@ -714,7 +724,9 @@ ipa_profile (void) something_changed = false; for (i = order_pos - 1; i >= 0; i--) { - if (order[i]->aux && ipa_propagate_frequency (order[i])) + if (order[i]->aux + && opt_for_fn (order[i]->decl, flag_ipa_profile) + && ipa_propagate_frequency (order[i])) { for (e = order[i]->callees; e; e = e->next_callee) if (e->callee->local.local && !e->callee->aux)