Hi, this patch saves almost 1Gb of GGC reachable memory for firefox. Currently we point to all streamed in trees from global decl in state and from function specific states. The states are big and they dangle pointers. This patch gets rid of states we do not need.
We run GGC collect just once during usual WPA compilation. Currently it is just after tree loading that introduce very little garbage. This patch moves it after unreachable code removal when freeing happen. Still we do not suceed in much of reuse in between GGC and heap, so peak memory use in TOP reduces by only 300MB. It may be interesting to allow random access into per-function decl in states and read them only after decl merging (and possibly unreachable function removal). Bootstrapped/regtested x86_64-linux, OK? * lto-symtab.c (lto_cgraph_replace_node): Free decl_in_state. * cgraph.c (cgraph_release_function_body): Free decl_in_state. * lto-section-in.c (lto_free_function_in_decl_state): New function. (lto_free_function_in_decl_state_for_node): New function. * lto.c (read_cgraph_and_symbols): Remove ggc_collect; clear section node; add comment why we do not collect. Index: lto-symtab.c =================================================================== --- lto-symtab.c (revision 202047) +++ lto-symtab.c (working copy) @@ -80,6 +80,8 @@ lto_cgraph_replace_node (struct cgraph_n /* Redirect incomming references. */ ipa_clone_referring ((symtab_node)prevailing_node, &node->symbol.ref_list); + lto_free_function_in_decl_state_for_node ((symtab_node)node); + if (node->symbol.decl != prevailing_node->symbol.decl) cgraph_release_function_body (node); Index: cgraph.c =================================================================== --- cgraph.c (revision 202047) +++ cgraph.c (working copy) @@ -1663,6 +1663,8 @@ cgraph_release_function_body (struct cgr if (!node->used_as_abstract_origin && DECL_INITIAL (node->symbol.decl)) DECL_INITIAL (node->symbol.decl) = error_mark_node; release_function_body (node->symbol.decl); + if (node->symbol.lto_file_data) + lto_free_function_in_decl_state_for_node ((symtab_node) node); } /* Remove the node from cgraph. */ Index: lto-section-in.c =================================================================== --- lto-section-in.c (revision 202047) +++ lto-section-in.c (working copy) @@ -414,6 +414,41 @@ lto_get_function_in_decl_state (struct l return slot? ((struct lto_in_decl_state*) *slot) : NULL; } +/* Free decl_states. */ + +void +lto_free_function_in_decl_state (struct lto_in_decl_state *state) +{ + int i; + for (i = 0; i < LTO_N_DECL_STREAMS; i++) + ggc_free (state->streams[i].trees); + ggc_free (state); +} + +/* Free decl_states associated with NODE. This makes it possible to furhter + release trees needed by the NODE's body. */ + +void +lto_free_function_in_decl_state_for_node (symtab_node node) +{ + struct lto_in_decl_state temp; + void **slot; + + if (!node->symbol.lto_file_data) + return; + + temp.fn_decl = node->symbol.decl; + slot = htab_find_slot (node->symbol.lto_file_data->function_decl_states, + &temp, NO_INSERT); + if (slot && *slot) + { + lto_free_function_in_decl_state ((struct lto_in_decl_state*) *slot); + htab_clear_slot (node->symbol.lto_file_data->function_decl_states, + slot); + } + node->symbol.lto_file_data = NULL; +} + /* Report read pass end of the section. */ Index: ipa.c =================================================================== --- ipa.c (revision 202047) +++ ipa.c (working copy) @@ -873,6 +873,17 @@ function_and_variable_visibility (bool w segfault though. */ symtab_dissolve_same_comdat_group_list ((symtab_node) node); } + if (node->symbol.externally_visible + && DECL_COMDAT (node->symbol.decl) + && comdat_can_be_unshared_p ((symtab_node) node)) + { + if (dump_file + && DECL_VISIBILITY (node->symbol.decl) != VISIBILITY_HIDDEN) + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n", + cgraph_node_name (node), node->symbol.order); + DECL_VISIBILITY (node->symbol.decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (node->symbol.decl) = true; + } if (node->thunk.thunk_p && TREE_PUBLIC (node->symbol.decl)) @@ -980,6 +991,17 @@ function_and_variable_visibility (bool w symtab_dissolve_same_comdat_group_list ((symtab_node) vnode); vnode->symbol.resolution = LDPR_PREVAILING_DEF_IRONLY; } + if (vnode->symbol.externally_visible + && DECL_COMDAT (vnode->symbol.decl) + && comdat_can_be_unshared_p ((symtab_node) vnode)) + { + if (dump_file + && DECL_VISIBILITY (vnode->symbol.decl) == VISIBILITY_HIDDEN) + fprintf (dump_file, "Promoting visibility to hidden: %s/%i\n", + varpool_node_name (vnode), vnode->symbol.order); + DECL_VISIBILITY (vnode->symbol.decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (vnode->symbol.decl) = true; + } } if (dump_file) Index: lto/lto.c =================================================================== --- lto/lto.c (revision 202047) +++ lto/lto.c (working copy) @@ -3429,7 +3429,6 @@ read_cgraph_and_symbols (unsigned nfiles tree_scc_hash.dispose (); obstack_free (&tree_scc_hash_obstack, NULL); free_gimple_type_tables (); - ggc_collect (); /* Set the hooks so that all of the ipa passes can read in their data. */ lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); @@ -3484,7 +3483,6 @@ read_cgraph_and_symbols (unsigned nfiles } htab_delete (tree_with_vars); tree_with_vars = NULL; - ggc_collect (); timevar_pop (TV_IPA_LTO_DECL_MERGE); /* Each pass will set the appropriate timer. */ @@ -3503,6 +3501,9 @@ read_cgraph_and_symbols (unsigned nfiles gcc_assert (all_file_decl_data[i]->symtab_node_encoder); lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); all_file_decl_data[i]->symtab_node_encoder = NULL; + lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); + all_file_decl_data[i]->global_decl_state = NULL; + all_file_decl_data[i]->current_decl_state = NULL; } /* Finally merge the cgraph according to the decl merging decisions. */ @@ -3513,7 +3514,12 @@ read_cgraph_and_symbols (unsigned nfiles dump_symtab (cgraph_dump_file); } lto_symtab_merge_symbols (); - ggc_collect (); + + /* Do not GGC collect here; streaming in should not produce garbage. + Be sure we first collect after merging symbols, setting up visibilities + and removing unreachable nodes. This will happen after whole program + visibility pass. This should release more memory back to the system + and possibly allow us to re-use it for heap. */ cgraph_state = CGRAPH_STATE_IPA_SSA; timevar_pop (TV_IPA_LTO_CGRAPH_MERGE); Index: lto-streamer.h =================================================================== --- lto-streamer.h (revision 202047) +++ lto-streamer.h (working copy) @@ -774,6 +774,8 @@ extern hashval_t lto_hash_in_decl_state extern int lto_eq_in_decl_state (const void *, const void *); extern struct lto_in_decl_state *lto_get_function_in_decl_state ( struct lto_file_decl_data *, tree); +extern void lto_free_function_in_decl_state (struct lto_in_decl_state *); +extern void lto_free_function_in_decl_state_for_node (symtab_node); extern void lto_section_overrun (struct lto_input_block *) ATTRIBUTE_NORETURN; extern void lto_value_range_error (const char *, HOST_WIDE_INT, HOST_WIDE_INT,