On Sun, 2024-11-17 at 19:15:04 +0000, Jan Hubicka wrote: > > I would suggest renaming produce_asm to produce_symbol_asm > and making produce_asm wrapper which passes fn=NULL and output_order=-1, > so we do not have odd parameters everywhere in streaming code. > > OK with this change. > Honza
Applied suggested change. ------------------------ This patch adds remapping of node order for each lto partition. Resulting order conserves relative order inside partition, but is independent of outside symbols. So if lto partition contains identical set of symbols, their remapped order will be stable between compilations. This stability is needed for Incremental LTO. gcc/ChangeLog: * ipa-devirt.cc (ipa_odr_summary_write): Add unused argument. * ipa-fnsummary.cc (ipa_fn_summary_write): Likewise. * ipa-icf.cc (sem_item_optimizer::write_summary): Likewise. * ipa-modref.cc (modref_write): Likewise. * ipa-prop.cc (ipa_prop_write_jump_functions): Likewise. (ipcp_write_transformation_summaries): Likewise. * ipa-sra.cc (ipa_sra_write_summary): Likewise. * lto-cgraph.cc (lto_symtab_encoder_delete): Delete remap. (lto_output_node): Remap order. (lto_output_varpool_node): Likewise. (output_cgraph_opt_summary): Add unused argument. * lto-streamer-out.cc (produce_symbol_asm): Renamed. Use remapped order. (produce_asm): Rename. New wrapper. (output_function): Propagate remapped order. (output_constructor): Likewise. (copy_function_or_variable): Likewise. (cmp_int): New. (create_order_remap): New. (lto_output): Create remap. Remap order. * lto-streamer.h (struct lto_symtab_encoder_d): Remap hash_map. (produce_asm): Add order argument. --- gcc/ipa-devirt.cc | 2 +- gcc/ipa-fnsummary.cc | 2 +- gcc/ipa-icf.cc | 2 +- gcc/ipa-modref.cc | 4 +- gcc/ipa-prop.cc | 4 +- gcc/ipa-sra.cc | 2 +- gcc/lto-cgraph.cc | 10 +++-- gcc/lto-streamer-out.cc | 93 +++++++++++++++++++++++++++++++++++------ gcc/lto-streamer.h | 5 ++- 9 files changed, 99 insertions(+), 25 deletions(-) diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc index e88e9db781e..cdd520ba76b 100644 --- a/gcc/ipa-devirt.cc +++ b/gcc/ipa-devirt.cc @@ -4131,7 +4131,7 @@ ipa_odr_summary_write (void) odr_enum_map = NULL; } - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); } diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc index 3f5e09960ef..c057536f551 100644 --- a/gcc/ipa-fnsummary.cc +++ b/gcc/ipa-fnsummary.cc @@ -5091,7 +5091,7 @@ ipa_fn_summary_write (void) } } streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); ipa_prop_write_jump_functions (); diff --git a/gcc/ipa-icf.cc b/gcc/ipa-icf.cc index 60152e60bc5..e9c5ae764f0 100644 --- a/gcc/ipa-icf.cc +++ b/gcc/ipa-icf.cc @@ -2216,7 +2216,7 @@ sem_item_optimizer::write_summary (void) } streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); } diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc index 7449041c102..e68f434aa10 100644 --- a/gcc/ipa-modref.cc +++ b/gcc/ipa-modref.cc @@ -3746,7 +3746,7 @@ modref_write () { streamer_write_uhwi (ob, 0); streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); return; } @@ -3821,7 +3821,7 @@ modref_write () } } streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); } diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc index 9070a45f683..86044e392aa 100644 --- a/gcc/ipa-prop.cc +++ b/gcc/ipa-prop.cc @@ -5338,7 +5338,7 @@ ipa_prop_write_jump_functions (void) ipa_write_node_info (ob, node); } streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); } @@ -5536,7 +5536,7 @@ ipcp_write_transformation_summaries (void) write_ipcp_transformation_info (ob, cnode, ts); } streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); } diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc index 04920f2aa8e..e6a75139eb0 100644 --- a/gcc/ipa-sra.cc +++ b/gcc/ipa-sra.cc @@ -2898,7 +2898,7 @@ ipa_sra_write_summary (void) isra_write_node_summary (ob, node); } streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); } diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index d1d63fd90ea..14275ed7c42 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -96,6 +96,8 @@ lto_symtab_encoder_delete (lto_symtab_encoder_t encoder) encoder->nodes.release (); if (encoder->map) delete encoder->map; + if (encoder->order_remap) + delete encoder->order_remap; free (encoder); } @@ -405,7 +407,8 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node, streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag, tag); - streamer_write_hwi_stream (ob->main_stream, node->order); + int output_order = *encoder->order_remap->get (node->order); + streamer_write_hwi_stream (ob->main_stream, output_order); /* In WPA mode, we only output part of the call-graph. Also, we fake cgraph node attributes. There are two cases that we care. @@ -602,7 +605,8 @@ lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node *node, streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag, LTO_symtab_variable); - streamer_write_hwi_stream (ob->main_stream, node->order); + int output_order = *encoder->order_remap->get (node->order); + streamer_write_hwi_stream (ob->main_stream, output_order); lto_output_var_decl_ref (ob->decl_state, ob->main_stream, node->decl); bp = bitpack_create (ob->main_stream); bp_pack_value (&bp, node->externally_visible, 1); @@ -2111,7 +2115,7 @@ output_cgraph_opt_summary (void) output_node_opt_summary (ob, cnode, encoder); } } - produce_asm (ob, NULL); + produce_asm (ob); destroy_output_block (ob); } diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc index 6dc30daf76e..27911242ad7 100644 --- a/gcc/lto-streamer-out.cc +++ b/gcc/lto-streamer-out.cc @@ -2202,12 +2202,11 @@ output_cfg (struct output_block *ob, struct function *fn) ob->main_stream = tmp_stream; } - /* Create the header in the file using OB. If the section type is for a function, set FN to the decl for that function. */ void -produce_asm (struct output_block *ob, tree fn) +produce_symbol_asm (struct output_block *ob, tree fn, int output_order) { enum lto_section_type section_type = ob->section_type; struct lto_function_header header; @@ -2217,8 +2216,7 @@ produce_asm (struct output_block *ob, tree fn) { const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fn)); section_name = lto_get_section_name (section_type, name, - symtab_node::get (fn)->order, - NULL); + output_order, NULL); } else section_name = lto_get_section_name (section_type, NULL, 0, NULL); @@ -2245,6 +2243,14 @@ produce_asm (struct output_block *ob, tree fn) lto_end_section (); } +/* Wrapper for unused arguments. */ + +void +produce_asm (struct output_block *ob) +{ + produce_symbol_asm (ob, NULL, -1); +} + /* Output the base body of struct function FN using output block OB. */ @@ -2402,7 +2408,7 @@ streamer_write_chain (struct output_block *ob, tree t, bool ref_p) /* Output the body of function NODE->DECL. */ static void -output_function (struct cgraph_node *node) +output_function (struct cgraph_node *node, int output_order) { tree function; struct function *fn; @@ -2479,7 +2485,7 @@ output_function (struct cgraph_node *node) streamer_write_uhwi (ob, 0); /* Create a section to hold the pickled output of this function. */ - produce_asm (ob, function); + produce_symbol_asm (ob, function, output_order); destroy_output_block (ob); if (streamer_dump_file) @@ -2490,7 +2496,7 @@ output_function (struct cgraph_node *node) /* Output the body of function NODE->DECL. */ static void -output_constructor (struct varpool_node *node) +output_constructor (struct varpool_node *node, int output_order) { tree var = node->decl; struct output_block *ob; @@ -2512,7 +2518,7 @@ output_constructor (struct varpool_node *node) stream_write_tree (ob, DECL_INITIAL (var), true); /* Create a section to hold the pickled output of this function. */ - produce_asm (ob, var); + produce_symbol_asm (ob, var, output_order); destroy_output_block (ob); if (streamer_dump_file) @@ -2573,7 +2579,7 @@ lto_output_toplevel_asms (void) /* Copy the function body or variable constructor of NODE without deserializing. */ static void -copy_function_or_variable (struct symtab_node *node) +copy_function_or_variable (struct symtab_node *node, int output_order) { tree function = node->decl; struct lto_file_decl_data *file_data = node->lto_file_data; @@ -2581,7 +2587,7 @@ copy_function_or_variable (struct symtab_node *node) size_t len; const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function)); char *section_name = - lto_get_section_name (LTO_section_function_body, name, node->order, NULL); + lto_get_section_name (LTO_section_function_body, name, output_order, NULL); size_t i, j; struct lto_in_decl_state *in_state; struct lto_out_decl_state *out_state = lto_get_out_decl_state (); @@ -2726,6 +2732,63 @@ cmp_symbol_files (const void *pn1, const void *pn2, void *id_map_) return n1->order - n2->order; } +/* Compare ints, callback for qsort. */ + +static int +cmp_int (const void *a, const void *b) +{ + int ia = *(int const*) a; + int ib = *(int const*) b; + return ia - ib; +} + +/* Create order mapping independent on symbols outside of the partition. + Results in stable order values for incremental LTO. + + Remapping is not done in place, because symbols can be used + by multiple partitions. */ + +static void +create_order_remap (lto_symtab_encoder_t encoder) +{ + auto_vec<int> orders; + unsigned i; + struct asm_node* anode; + encoder->order_remap = new hash_map<int_hash<int, -1, -2>, int>; + unsigned n_nodes = lto_symtab_encoder_size (encoder); + + for (i = 0; i < n_nodes; i++) + orders.safe_push (lto_symtab_encoder_deref (encoder, i)->order); + + if (!asm_nodes_output) + { + for (anode = symtab->first_asm_symbol (); anode; anode = anode->next) + orders.safe_push (anode->order); + } + + orders.qsort (cmp_int); + int ord = 0; + int last_order = -1; + for (i = 0; i < orders.length (); i++) + { + int order = orders[i]; + if (order != last_order) + { + last_order = order; + encoder->order_remap->put (order, ord); + ord++; + } + } + + /* Asm nodes are currently always output only into first partition. + We can remap already here. */ + if (!asm_nodes_output) + { + for (anode = symtab->first_asm_symbol (); anode; anode = anode->next) + anode->order = *encoder->order_remap->get (anode->order); + } +} + /* Main entry point from the pass manager. */ void @@ -2738,6 +2801,8 @@ lto_output (void) lto_symtab_encoder_t encoder = lto_get_out_decl_state ()->symtab_node_encoder; auto_vec<symtab_node *> symbols_to_copy; + create_order_remap (encoder); + prune_offload_funcs (); if (flag_checking) @@ -2800,6 +2865,8 @@ lto_output (void) cgraph_node *cnode; varpool_node *vnode; + int output_order = *encoder->order_remap->get (snode->order); + if (flag_checking) gcc_assert (bitmap_set_bit (output, DECL_UID (snode->decl))); @@ -2814,14 +2881,14 @@ lto_output (void) at WPA time. */ || DECL_ARGUMENTS (cnode->decl) || cnode->declare_variant_alt)) - output_function (cnode); + output_function (cnode, output_order); else if ((vnode = dyn_cast <varpool_node *> (snode)) && (DECL_INITIAL (vnode->decl) != error_mark_node || (!flag_wpa && flag_incremental_link != INCREMENTAL_LINK_LTO))) - output_constructor (vnode); + output_constructor (vnode, output_order); else - copy_function_or_variable (snode); + copy_function_or_variable (snode, output_order); gcc_assert (lto_get_out_decl_state () == decl_state); lto_pop_out_decl_state (); lto_record_function_out_decl_state (snode->decl, decl_state); diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index 1c416a7a1b9..0699c6928d4 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -470,6 +470,9 @@ struct lto_symtab_encoder_d { vec<lto_encoder_entry> nodes; hash_map<symtab_node *, size_t> *map; + + /* Mapping of input order of nodes onto output order. */ + hash_map<int_hash<int, -1, -2>, int> *order_remap; }; typedef struct lto_symtab_encoder_d *lto_symtab_encoder_t; @@ -896,7 +899,7 @@ extern void lto_output_fn_decl_ref (struct lto_out_decl_state *, extern tree lto_input_var_decl_ref (lto_input_block *, lto_file_decl_data *); extern tree lto_input_fn_decl_ref (lto_input_block *, lto_file_decl_data *); extern void lto_output_toplevel_asms (void); -extern void produce_asm (struct output_block *ob, tree fn); +extern void produce_asm (struct output_block *ob); extern void lto_output (); extern void produce_asm_for_decls (); void lto_output_decl_state_streams (struct output_block *, -- 2.47.0