Hi, this patch makes polymorphic call targets cache more effective. With more aggresive speculation the code now run into a problem that it have many different speculative lists. Those are typically sort in number of calls speculatively considered likely but they do have long list of unlikely targets, too.
This patch reorganizes possible_polymorphic_call_targets to give speculative and non-speculative lists separately. This way the speuclative lists are many and short while non-speculative are few but long. The code still spends more time than I would like to - next resonable optimization to do is to kill the recursive type walks looking for BINFO. Instead I can preprocess ODR types and store table of offsets where BINFOs are located and types associated with them. This will also avoid the ugly details of C++ ABI. I would like to do this change after retiring get_binfo_at_offset from ipa-prop. The firefox WPA is now as follows: phase opt and generate : 77.55 (64%) usr 1.52 (16%) sys 79.06 (56%) wall 720726 kB (16%) ggc phase stream in : 36.84 (30%) usr 2.13 (22%) sys 38.97 (28%) wall 3650329 kB (83%) ggc phase stream out : 6.67 ( 6%) usr 6.12 (63%) sys 23.00 (16%) wall 0 kB ( 0%) ggc callgraph optimization : 0.87 ( 1%) usr 0.00 ( 0%) sys 0.87 ( 1%) wall 34 kB ( 0%) ggc ipa dead code removal : 9.56 ( 8%) usr 0.10 ( 1%) sys 9.79 ( 7%) wall 0 kB ( 0%) ggc ipa virtual call target : 8.11 ( 7%) usr 0.09 ( 1%) sys 8.09 ( 6%) wall 0 kB ( 0%) ggc ipa cp : 2.68 ( 2%) usr 0.18 ( 2%) sys 2.86 ( 2%) wall 234548 kB ( 5%) ggc ipa inlining heuristics : 35.32 (29%) usr 1.14 (12%) sys 36.46 (26%) wall 960324 kB (22%) ggc ipa lto decl in : 25.61 (21%) usr 1.32 (14%) sys 26.95 (19%) wall 2629518 kB (60%) ggc ipa lto decl out : 5.84 ( 5%) usr 0.31 ( 3%) sys 6.15 ( 4%) wall 0 kB ( 0%) ggc ipa lto cgraph I/O : 1.48 ( 1%) usr 0.26 ( 3%) sys 1.73 ( 1%) wall 487517 kB (11%) ggc ipa lto decl merge : 3.03 ( 3%) usr 0.01 ( 0%) sys 3.04 ( 2%) wall 16412 kB ( 0%) ggc ipa lto cgraph merge : 2.84 ( 2%) usr 0.00 ( 0%) sys 2.84 ( 2%) wall 12531 kB ( 0%) ggc whopr wpa : 2.51 ( 2%) usr 0.00 ( 0%) sys 2.50 ( 2%) wall 1 kB ( 0%) ggc whopr partitioning : 8.78 ( 7%) usr 0.02 ( 0%) sys 8.81 ( 6%) wall 5082 kB ( 0%) ggc ipa reference : 4.95 ( 4%) usr 0.08 ( 1%) sys 5.03 ( 4%) wall 0 kB ( 0%) ggc ipa pure const : 5.64 ( 5%) usr 0.03 ( 0%) sys 5.66 ( 4%) wall 0 kB ( 0%) ggc TOTAL : 121.06 9.77 141.04 4372468 kB Compared to 4.9.1: Execution times (seconds) phase setup : 0.01 ( 0%) usr 0.01 ( 0%) sys 0.05 ( 0%) wall 1534 kB ( 0%) ggc phase opt and generate : 71.55 (61%) usr 1.76 (18%) sys 73.44 (48%) wall 822835 kB (18%) ggc phase stream in : 40.27 (34%) usr 2.59 (26%) sys 52.26 (34%) wall 3647578 kB (82%) ggc phase stream out : 5.83 ( 5%) usr 5.51 (56%) sys 28.17 (18%) wall 0 kB ( 0%) ggc garbage collection : 2.43 ( 2%) usr 0.00 ( 0%) sys 2.48 ( 2%) wall 0 kB ( 0%) ggc ipa dead code removal : 8.31 ( 7%) usr 0.29 ( 3%) sys 8.64 ( 6%) wall 87 kB ( 0%) ggc ipa virtual call target : 7.62 ( 6%) usr 0.05 ( 1%) sys 7.59 ( 5%) wall 0 kB ( 0%) ggc ipa cp : 2.42 ( 2%) usr 0.26 ( 3%) sys 2.75 ( 2%) wall 238617 kB ( 5%) ggc ipa inlining heuristics : 34.58 (29%) usr 1.02 (10%) sys 35.67 (23%) wall 980366 kB (22%) ggc ipa lto decl in : 28.95 (25%) usr 1.91 (19%) sys 39.90 (26%) wall 2755921 kB (62%) ggc ipa lto decl out : 5.22 ( 4%) usr 0.68 ( 7%) sys 5.90 ( 4%) wall 0 kB ( 0%) ggc ipa lto cgraph I/O : 1.27 ( 1%) usr 0.34 ( 3%) sys 1.81 ( 1%) wall 452790 kB (10%) ggc ipa lto decl merge : 3.12 ( 3%) usr 0.00 ( 0%) sys 3.12 ( 2%) wall 16413 kB ( 0%) ggc ipa lto cgraph merge : 2.98 ( 3%) usr 0.00 ( 0%) sys 2.99 ( 2%) wall 11878 kB ( 0%) ggc whopr wpa : 1.45 ( 1%) usr 0.00 ( 0%) sys 1.44 ( 1%) wall 2 kB ( 0%) ggc whopr partitioning : 6.34 ( 5%) usr 0.07 ( 1%) sys 6.41 ( 4%) wall 3860 kB ( 0%) ggc ipa reference : 5.11 ( 4%) usr 0.10 ( 1%) sys 5.20 ( 3%) wall 0 kB ( 0%) ggc ipa pure const : 5.31 ( 5%) usr 0.02 ( 0%) sys 5.34 ( 3%) wall 0 kB ( 0%) ggc TOTAL : 117.66 9.87 153.92 4471948 kB So we are back from regression land, but would be nice to see some actual improvements soon. Bootstrapped/regtested x86_64-linux, will commit it tomorrow. Honza * ipa-devirt.c (polymorphic_call_target_d): Add SPECULATIVE; reorder for better storage. (polymorphic_call_target_hasher::hash): Hash SPECULATIVE. (possible_polymorphic_call_targets): Instead of computing both speculative and non-speculative answers, do just one at a time. Replace NONSPECULATIVE_TARGETSP parameter with SPECULATIVE flag. (dump_targets): Break out from ... (dump_possible_polymorphic_call_targets): ... here; dump both speculative and non-speculative lists. (ipa_devirt): Update for new possible_polymorphic_call_targets API. * ipa-utils.h (possible_polymorphic_call_targets): Update. * testsuite/g++.dg/ipa/devirt-34.C: Update template. Index: ipa-devirt.c =================================================================== --- ipa-devirt.c (revision 215577) +++ ipa-devirt.c (working copy) @@ -1884,10 +1884,10 @@ struct polymorphic_call_target_d ipa_polymorphic_call_context context; odr_type type; vec <cgraph_node *> targets; - int speculative_targets; - bool complete; - int type_warning; tree decl_warning; + int type_warning; + bool complete; + bool speculative; }; /* Polymorphic call target cache helpers. */ @@ -1917,6 +1917,7 @@ polymorphic_call_target_hasher::hash (co hstate.merge_hash (TYPE_UID (odr_query->context.speculative_outer_type)); hstate.add_wide_int (odr_query->context.speculative_offset); } + hstate.add_flag (odr_query->speculative); hstate.add_flag (odr_query->context.maybe_in_construction); hstate.add_flag (odr_query->context.maybe_derived_type); hstate.add_flag (odr_query->context.speculative_maybe_derived_type); @@ -1931,6 +1932,7 @@ polymorphic_call_target_hasher::equal (c const compare_type *t2) { return (t1->type == t2->type && t1->otr_token == t2->otr_token + && t1->speculative == t2->speculative && t1->context.offset == t2->context.offset && t1->context.speculative_offset == t2->context.speculative_offset && t1->context.outer_type == t2->context.outer_type @@ -3667,10 +3669,8 @@ struct final_warning_record *final_warni in the target cache. If user needs to visit every target list just once, it can memoize them. - SPECULATION_TARGETS specify number of targets that are speculatively - likely. These include targets specified by the speculative part - of polymoprhic call context and also exclude all targets for classes - in construction. + If SPECULATIVE is set, the list will not contain targets that + are not speculatively taken. Returned vector is placed into cache. It is NOT caller's responsibility to free it. The vector can be freed on cgraph_remove_node call if @@ -3682,7 +3682,7 @@ possible_polymorphic_call_targets (tree ipa_polymorphic_call_context context, bool *completep, void **cache_token, - int *speculative_targetsp) + bool speculative) { static struct cgraph_node_hook_list *node_removal_hook_holder; vec <cgraph_node *> nodes = vNULL; @@ -3706,13 +3706,11 @@ possible_polymorphic_call_targets (tree *completep = context.invalid; if (cache_token) *cache_token = NULL; - if (speculative_targetsp) - *speculative_targetsp = 0; return nodes; } /* Do not bother to compute speculative info when user do not asks for it. */ - if (!speculative_targetsp || !context.speculative_outer_type) + if (!speculative || !context.speculative_outer_type) context.clear_speculation (); type = get_odr_type (otr_type, true); @@ -3730,8 +3728,6 @@ possible_polymorphic_call_targets (tree *completep = true; if (cache_token) *cache_token = NULL; - if (speculative_targetsp) - *speculative_targetsp = 0; return nodes; } gcc_assert (!context.invalid); @@ -3783,6 +3779,7 @@ possible_polymorphic_call_targets (tree /* Lookup cached answer. */ key.type = type; key.otr_token = otr_token; + key.speculative = speculative; key.context = context; slot = polymorphic_call_target_hash->find_slot (&key, INSERT); if (cache_token) @@ -3791,15 +3788,13 @@ possible_polymorphic_call_targets (tree { if (completep) *completep = (*slot)->complete; - if (speculative_targetsp) - *speculative_targetsp = (*slot)->speculative_targets; if ((*slot)->type_warning && final_warning_records) { final_warning_records->type_warnings[(*slot)->type_warning - 1].count++; final_warning_records->type_warnings[(*slot)->type_warning - 1].dyn_count += final_warning_records->dyn_count; } - if ((*slot)->decl_warning && final_warning_records) + if (!speculative && (*slot)->decl_warning && final_warning_records) { struct decl_warn_count *c = final_warning_records->decl_warnings.get ((*slot)->decl_warning); @@ -3819,7 +3814,7 @@ possible_polymorphic_call_targets (tree (*slot)->type = type; (*slot)->otr_token = otr_token; (*slot)->context = context; - (*slot)->speculative_targets = 0; + (*slot)->speculative = speculative; hash_set<tree> inserted; hash_set<tree> matched_vtables; @@ -3864,137 +3859,136 @@ possible_polymorphic_call_targets (tree &speculation_complete, bases_to_consider, false); - (*slot)->speculative_targets = nodes.length(); } - /* First see virtual method of type itself. */ - binfo = get_binfo_at_offset (TYPE_BINFO (outer_type->type), - context.offset, otr_type); - if (binfo) - target = gimple_get_virt_method_for_binfo (otr_token, binfo, - &can_refer); - else + if (!speculative || !nodes.length ()) { - gcc_assert (odr_violation_reported); - target = NULL; - } - - /* Destructors are never called through construction virtual tables, - because the type is always known. */ - if (target && DECL_CXX_DESTRUCTOR_P (target)) - context.maybe_in_construction = false; + /* First see virtual method of type itself. */ + binfo = get_binfo_at_offset (TYPE_BINFO (outer_type->type), + context.offset, otr_type); + if (binfo) + target = gimple_get_virt_method_for_binfo (otr_token, binfo, + &can_refer); + else + { + gcc_assert (odr_violation_reported); + target = NULL; + } - if (target) - { - /* In the case we get complete method, we don't need - to walk derivations. */ - if (DECL_FINAL_P (target)) - context.maybe_derived_type = false; - } + /* Destructors are never called through construction virtual tables, + because the type is always known. */ + if (target && DECL_CXX_DESTRUCTOR_P (target)) + context.maybe_in_construction = false; - /* If OUTER_TYPE is abstract, we know we are not seeing its instance. */ - if (type_possibly_instantiated_p (outer_type->type)) - maybe_record_node (nodes, target, &inserted, can_refer, &complete); - else - skipped = true; + if (target) + { + /* In the case we get complete method, we don't need + to walk derivations. */ + if (DECL_FINAL_P (target)) + context.maybe_derived_type = false; + } - if (binfo) - matched_vtables.add (BINFO_VTABLE (binfo)); + /* If OUTER_TYPE is abstract, we know we are not seeing its instance. */ + if (type_possibly_instantiated_p (outer_type->type)) + maybe_record_node (nodes, target, &inserted, can_refer, &complete); + else + skipped = true; - /* Next walk recursively all derived types. */ - if (context.maybe_derived_type) - { - for (i = 0; i < outer_type->derived_types.length(); i++) - possible_polymorphic_call_targets_1 (nodes, &inserted, - &matched_vtables, - otr_type, - outer_type->derived_types[i], - otr_token, outer_type->type, - context.offset, &complete, - bases_to_consider, - context.maybe_in_construction); + if (binfo) + matched_vtables.add (BINFO_VTABLE (binfo)); - if (!outer_type->all_derivations_known) + /* Next walk recursively all derived types. */ + if (context.maybe_derived_type) { - if (final_warning_records) + for (i = 0; i < outer_type->derived_types.length(); i++) + possible_polymorphic_call_targets_1 (nodes, &inserted, + &matched_vtables, + otr_type, + outer_type->derived_types[i], + otr_token, outer_type->type, + context.offset, &complete, + bases_to_consider, + context.maybe_in_construction); + + if (!outer_type->all_derivations_known) { - if (complete - && nodes.length () == 1 - && warn_suggest_final_types - && !outer_type->derived_types.length ()) - { - if (outer_type->id >= (int)final_warning_records->type_warnings.length ()) - final_warning_records->type_warnings.safe_grow_cleared - (odr_types.length ()); - final_warning_records->type_warnings[outer_type->id].count++; - final_warning_records->type_warnings[outer_type->id].dyn_count - += final_warning_records->dyn_count; - final_warning_records->type_warnings[outer_type->id].type - = outer_type->type; - (*slot)->type_warning = outer_type->id + 1; - } - if (complete - && warn_suggest_final_methods - && nodes.length () == 1 - && types_same_for_odr (DECL_CONTEXT (nodes[0]->decl), - outer_type->type)) + if (!speculative && final_warning_records) { - bool existed; - struct decl_warn_count &c = - final_warning_records->decl_warnings.get_or_insert - (nodes[0]->decl, &existed); - - if (existed) + if (complete + && nodes.length () == 1 + && warn_suggest_final_types + && !outer_type->derived_types.length ()) { - c.count++; - c.dyn_count += final_warning_records->dyn_count; + if (outer_type->id >= (int)final_warning_records->type_warnings.length ()) + final_warning_records->type_warnings.safe_grow_cleared + (odr_types.length ()); + final_warning_records->type_warnings[outer_type->id].count++; + final_warning_records->type_warnings[outer_type->id].dyn_count + += final_warning_records->dyn_count; + final_warning_records->type_warnings[outer_type->id].type + = outer_type->type; + (*slot)->type_warning = outer_type->id + 1; } - else + if (complete + && warn_suggest_final_methods + && nodes.length () == 1 + && types_same_for_odr (DECL_CONTEXT (nodes[0]->decl), + outer_type->type)) { - c.count = 1; - c.dyn_count = final_warning_records->dyn_count; - c.decl = nodes[0]->decl; + bool existed; + struct decl_warn_count &c = + final_warning_records->decl_warnings.get_or_insert + (nodes[0]->decl, &existed); + + if (existed) + { + c.count++; + c.dyn_count += final_warning_records->dyn_count; + } + else + { + c.count = 1; + c.dyn_count = final_warning_records->dyn_count; + c.decl = nodes[0]->decl; + } + (*slot)->decl_warning = nodes[0]->decl; } - (*slot)->decl_warning = nodes[0]->decl; } + complete = false; } - complete = false; } - } - - /* Finally walk bases, if asked to. */ - if (!(*slot)->speculative_targets) - (*slot)->speculative_targets = nodes.length(); - /* Destructors are never called through construction virtual tables, - because the type is always known. One of entries may be cxa_pure_virtual - so look to at least two of them. */ - if (context.maybe_in_construction) - for (i =0 ; i < MIN (nodes.length (), 2); i++) - if (DECL_CXX_DESTRUCTOR_P (nodes[i]->decl)) - context.maybe_in_construction = false; - if (context.maybe_in_construction) - { - if (type != outer_type - && (!skipped - || (context.maybe_derived_type - && !type_all_derivations_known_p (outer_type->type)))) - record_targets_from_bases (otr_type, otr_token, outer_type->type, - context.offset, nodes, &inserted, - &matched_vtables, &complete); - if (skipped) - maybe_record_node (nodes, target, &inserted, can_refer, &complete); - for (i = 0; i < bases_to_consider.length(); i++) - maybe_record_node (nodes, bases_to_consider[i], &inserted, can_refer, &complete); + if (!speculative) + { + /* Destructors are never called through construction virtual tables, + because the type is always known. One of entries may be cxa_pure_virtual + so look to at least two of them. */ + if (context.maybe_in_construction) + for (i =0 ; i < MIN (nodes.length (), 2); i++) + if (DECL_CXX_DESTRUCTOR_P (nodes[i]->decl)) + context.maybe_in_construction = false; + if (context.maybe_in_construction) + { + if (type != outer_type + && (!skipped + || (context.maybe_derived_type + && !type_all_derivations_known_p (outer_type->type)))) + record_targets_from_bases (otr_type, otr_token, outer_type->type, + context.offset, nodes, &inserted, + &matched_vtables, &complete); + if (skipped) + maybe_record_node (nodes, target, &inserted, can_refer, &complete); + for (i = 0; i < bases_to_consider.length(); i++) + maybe_record_node (nodes, bases_to_consider[i], &inserted, can_refer, &complete); + } + } } - bases_to_consider.release(); + bases_to_consider.release(); (*slot)->targets = nodes; (*slot)->complete = complete; if (completep) *completep = complete; - if (speculative_targetsp) - *speculative_targetsp = (*slot)->speculative_targets; timevar_pop (TV_IPA_VIRTUAL_CALL); return nodes; @@ -4008,6 +4002,29 @@ add_decl_warning (const tree &key ATTRIB return true; } +/* Dump target list TARGETS into FILE. */ + +static void +dump_targets (FILE *f, vec <cgraph_node *> targets) +{ + unsigned int i; + + for (i = 0; i < targets.length (); i++) + { + char *name = NULL; + if (in_lto_p) + name = cplus_demangle_v3 (targets[i]->asm_name (), 0); + fprintf (f, " %s/%i", name ? name : targets[i]->name (), targets[i]->order); + if (in_lto_p) + free (name); + if (!targets[i]->definition) + fprintf (f, " (no definition%s)", + DECL_DECLARED_INLINE_P (targets[i]->decl) + ? " inline" : ""); + } + fprintf (f, "\n"); +} + /* Dump all possible targets of a polymorphic call. */ void @@ -4019,14 +4036,13 @@ dump_possible_polymorphic_call_targets ( vec <cgraph_node *> targets; bool final; odr_type type = get_odr_type (TYPE_MAIN_VARIANT (otr_type), false); - unsigned int i; - int speculative; + unsigned int len; if (!type) return; targets = possible_polymorphic_call_targets (otr_type, otr_token, ctx, - &final, NULL, &speculative); + &final, NULL, false); fprintf (f, " Targets of polymorphic call of type %i:", type->id); print_generic_expr (f, type->type, TDF_SLIM); fprintf (f, " token %i\n", (int)otr_token); @@ -4039,23 +4055,19 @@ dump_possible_polymorphic_call_targets ( ctx.maybe_in_construction ? " (base types included)" : "", ctx.maybe_derived_type ? " (derived types included)" : "", ctx.speculative_maybe_derived_type ? " (speculative derived types included)" : ""); - for (i = 0; i < targets.length (); i++) + len = targets.length (); + dump_targets (f, targets); + + targets = possible_polymorphic_call_targets (otr_type, otr_token, + ctx, + &final, NULL, true); + gcc_assert (targets.length () <= len); + if (targets.length () != len) { - char *name = NULL; - if (i == (unsigned)speculative) - fprintf (f, "\n Targets that are not likely:\n" - " "); - if (in_lto_p) - name = cplus_demangle_v3 (targets[i]->asm_name (), 0); - fprintf (f, " %s/%i", name ? name : targets[i]->name (), targets[i]->order); - if (in_lto_p) - free (name); - if (!targets[i]->definition) - fprintf (f, " (no definition%s)", - DECL_DECLARED_INLINE_P (targets[i]->decl) - ? " inline" : ""); + fprintf (f, " Speculative targets:"); + dump_targets (f, targets); } - fprintf (f, "\n\n"); + fprintf (f, "\n"); } @@ -4241,16 +4253,19 @@ ipa_devirt (void) struct cgraph_node *likely_target = NULL; void *cache_token; bool final; - int speculative_targets; if (final_warning_records) final_warning_records->dyn_count = e->count; vec <cgraph_node *>targets = possible_polymorphic_call_targets - (e, &final, &cache_token, &speculative_targets); + (e, &final, &cache_token, true); unsigned int i; + /* Trigger warnings by calculating non-speculative targets. */ + if (warn_suggest_final_methods || warn_suggest_final_types) + possible_polymorphic_call_targets (e); + if (dump_file) dump_possible_polymorphic_call_targets (dump_file, e); @@ -4289,13 +4304,10 @@ ipa_devirt (void) { if (likely_target) { - if (i < (unsigned) speculative_targets) - { - likely_target = NULL; - if (dump_file) - fprintf (dump_file, "More than one likely target\n\n"); - nmultiple++; - } + likely_target = NULL; + if (dump_file) + fprintf (dump_file, "More than one likely target\n\n"); + nmultiple++; break; } likely_target = targets[i]; Index: ipa-utils.h =================================================================== --- ipa-utils.h (revision 215575) +++ ipa-utils.h (working copy) @@ -62,7 +62,7 @@ possible_polymorphic_call_targets (tree, ipa_polymorphic_call_context, bool *copletep = NULL, void **cache_token = NULL, - int *nonconstruction_targets = NULL); + bool speuclative = false); odr_type get_odr_type (tree, bool insert = false); bool possible_polymorphic_call_target_p (tree ref, gimple stmt, struct cgraph_node *n); void dump_possible_polymorphic_call_targets (FILE *, tree, HOST_WIDE_INT, @@ -92,7 +92,7 @@ inline vec <cgraph_node *> possible_polymorphic_call_targets (struct cgraph_edge *e, bool *completep = NULL, void **cache_token = NULL, - int *nonconstruction_targets = NULL) + bool speculative = false) { ipa_polymorphic_call_context context(e); @@ -100,7 +100,7 @@ possible_polymorphic_call_targets (struc e->indirect_info->otr_token, context, completep, cache_token, - nonconstruction_targets); + speculative); } /* Same as above but taking OBJ_TYPE_REF as an parameter. */ Index: testsuite/g++.dg/ipa/devirt-34.C =================================================================== --- testsuite/g++.dg/ipa/devirt-34.C (revision 215575) +++ testsuite/g++.dg/ipa/devirt-34.C (working copy) @@ -15,6 +15,6 @@ t(struct B *b) /* We should guess that the pointer of type B probably points to an instance of B or its derivates and exclude A::t from list of likely targets. */ -/* { dg-final { scan-ipa-dump "Targets that are not likely" "devirt" } } */ +/* { dg-final { scan-ipa-dump "Speculative targets" "devirt" } } */ /* { dg-final { scan-ipa-dump "1 speculatively devirtualized" "devirt" } } */ /* { dg-final { cleanup-ipa-dump "devirt" } } */