On Wed, 1 Oct 2014, Ilya Verbin wrote: > On 30 Sep 13:40, Thomas Schwinge wrote: > > As just discussed for the libgcc changes in > > <http://news.gmane.org/find-root.php?message_id=%3C87d2ad73ze.fsf%40schwinge.name%3E>, > > just some suggestions regarding the terminology, where I think that the > > term »target« might be confusing in comments or symbols' names. That is, > > in the following, »target« should possibly be replaced by »offload[ing]« > > or similar: > > > > What about: > > > > #define OFFLOAD_SECTION_NAME_PREFIX ".gnu.offload_lto_" > > Renamed, patch is updated. > > Thanks, > -- Ilya > > > gcc/ > * cgraph.h (symtab_node): Add need_lto_streaming flag. > * cgraphunit.c: Include lto-section-names.h. > (initialize_offload): New function. > (ipa_passes): Initialize offload and call ipa_write_summaries if there > is something to write to OFFLOAD_SECTION_NAME_PREFIX sections. > (symbol_table::compile): Call lto_streamer_hooks_init under flag_openmp. > * ipa-inline-analysis.c (inline_generate_summary): Do not exit under > flag_openmp. > (inline_free_summary): Always remove hooks. > * lto-cgraph.c (referenced_from_other_partition_p): Ignore references > from non-offloadable nodes while streaming a node into offload section. > (reachable_from_other_partition_p): Likewise. > (select_what_to_stream): New function. > (compute_ltrans_boundary): Do not call > lto_set_symtab_encoder_in_partition if the node should not be streamed. > * lto-section-names.h (OFFLOAD_SECTION_NAME_PREFIX): Define. > (section_name_prefix): Declare. > * lto-streamer.c (section_name_prefix): New variable. > (lto_get_section_name): Use section_name_prefix instead of > LTO_SECTION_NAME_PREFIX. > * lto-streamer.h (select_what_to_stream): Declare. > * omp-low.c (is_targetreg_ctx): New function. > (create_omp_child_function, check_omp_nesting_restrictions): Use it. > (expand_omp_target): Set mark_force_output for the offloaded functions. > (lower_omp_critical): Add target attribute for omp critical symbol. > * passes.c (ipa_write_summaries): New argument offload_lto_mode. Call > select_what_to_stream. Do not call lto_set_symtab_encoder_in_partition > if the node should not be streamed out. > * tree-pass.h (ipa_write_summaries): New bool argument. > gcc/lto/ > * lto-object.c (lto_obj_add_section): Use section_name_prefix instead of > LTO_SECTION_NAME_PREFIX. > * lto-partition.c (lto_promote_cross_file_statics): Call > select_what_to_stream. > * lto.c (lto_section_with_id): Use section_name_prefix instead of > LTO_SECTION_NAME_PREFIX. > (read_cgraph_and_symbols): Read OFFLOAD_SECTION_NAME_PREFIX sections, if > being built as an offload compiler. > > --- > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h > index 4fd58a5..df0b0e2 100644 > --- a/gcc/cgraph.h > +++ b/gcc/cgraph.h > @@ -444,6 +444,10 @@ public: > /* Set when init priority is set. */ > unsigned in_init_priority_hash : 1; > > + /* Set when symbol needs to be streamed into LTO bytecode for LTO, or in > case > + of offloading, for separate compilation for a different target. */ > + unsigned need_lto_streaming : 1; > + > > /* Ordering of all symtab entries. */ > int order; > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > index d463505..5eb9d64 100644 > --- a/gcc/cgraphunit.c > +++ b/gcc/cgraphunit.c > @@ -211,6 +211,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-nested.h" > #include "gimplify.h" > #include "dbgcnt.h" > +#include "lto-section-names.h" > > /* Queue of cgraph nodes scheduled to be added into cgraph. This is a > secondary queue used during optimization to accommodate passes that > @@ -1994,9 +1995,40 @@ output_in_order (bool no_reorder) > free (nodes); > } > > +/* Check whether there is at least one function or global variable to > offload. > + */ > + > +static bool > +initialize_offload (void) > +{ > + bool have_offload = false; > + struct cgraph_node *node; > + struct varpool_node *vnode; > + > + FOR_EACH_DEFINED_FUNCTION (node) > + if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES > (node->decl))) > + { > + have_offload = true; > + break; > + } > + > + FOR_EACH_DEFINED_VARIABLE (vnode) > + { > + if (!lookup_attribute ("omp declare target", > + DECL_ATTRIBUTES (vnode->decl)) > + || TREE_CODE (vnode->decl) != VAR_DECL > + || DECL_SIZE (vnode->decl) == 0) > + continue; > + have_offload = true; > + } > + > + return have_offload; > +} > +
I wonder if we can avoid the above by means of a global have_offload flag? (or inside gcc::context) > static void > ipa_passes (void) > { > + bool have_offload = false; > gcc::pass_manager *passes = g->get_passes (); > > set_cfun (NULL); > @@ -2004,6 +2036,14 @@ ipa_passes (void) > gimple_register_cfg_hooks (); > bitmap_obstack_initialize (NULL); > > + if (!in_lto_p && flag_openmp) As -fopenmp is not generally available it's odd to test flag_openmp (though that is available everywhere as implementation detail). Doesn't offloading work without -fopenmp? > + { > + have_offload = initialize_offload (); > + /* OpenMP offloading requires LTO infrastructure. */ > + if (have_offload) > + flag_generate_lto = 1; > + } > + > invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL); > > if (!in_lto_p) > @@ -2041,7 +2081,18 @@ ipa_passes (void) > targetm.asm_out.lto_start (); > > if (!in_lto_p) > - ipa_write_summaries (); > + { > + if (have_offload) > + { > + section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; > + ipa_write_summaries (true); > + } > + if (flag_lto) > + { > + section_name_prefix = LTO_SECTION_NAME_PREFIX; > + ipa_write_summaries (false); > + } > + } > > if (flag_generate_lto) > targetm.asm_out.lto_end (); > @@ -2122,7 +2173,7 @@ symbol_table::compile (void) > state = IPA; > > /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */ > - if (flag_lto) > + if (flag_lto || flag_openmp) flag_generate_lto? > lto_streamer_hooks_init (); > > /* Don't run the IPA passes if there was any error or sorry messages. */ > diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c > index 38f56d2..076a1e8 100644 > --- a/gcc/ipa-inline-analysis.c > +++ b/gcc/ipa-inline-analysis.c > @@ -4010,7 +4010,7 @@ inline_generate_summary (void) > > /* When not optimizing, do not bother to analyze. Inlining is still done > because edge redirection needs to happen there. */ > - if (!optimize && !flag_lto && !flag_wpa) > + if (!optimize && !flag_lto && !flag_wpa && !flag_openmp) > return; Likewise !flag_generate_lto > function_insertion_hook_holder = > @@ -4325,11 +4325,6 @@ void > inline_free_summary (void) > { > struct cgraph_node *node; > - if (!inline_edge_summary_vec.exists ()) > - return; > - FOR_EACH_DEFINED_FUNCTION (node) > - if (!node->alias) > - reset_inline_summary (node); > if (function_insertion_hook_holder) > symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder); > function_insertion_hook_holder = NULL; > @@ -4345,6 +4340,11 @@ inline_free_summary (void) > if (edge_duplication_hook_holder) > symtab->remove_edge_duplication_hook (edge_duplication_hook_holder); > edge_duplication_hook_holder = NULL; > + if (!inline_edge_summary_vec.exists ()) > + return; > + FOR_EACH_DEFINED_FUNCTION (node) > + if (!node->alias) > + reset_inline_summary (node); > vec_free (inline_summary_vec); > inline_edge_summary_vec.release (); > if (edge_predicate_pool) > diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c > index 0584946..c1fccfa 100644 > --- a/gcc/lto-cgraph.c > +++ b/gcc/lto-cgraph.c > @@ -321,6 +321,11 @@ referenced_from_other_partition_p (symtab_node *node, > lto_symtab_encoder_t encod > > for (i = 0; node->iterate_referring (i, ref); i++) > { > + /* Ignore references from non-offloadable nodes while streaming NODE > into > + offload LTO section. */ > + if (!ref->referring->need_lto_streaming) > + continue; > + > if (ref->referring->in_other_partition > || !lto_symtab_encoder_in_partition_p (encoder, ref->referring)) > return true; > @@ -339,9 +344,16 @@ reachable_from_other_partition_p (struct cgraph_node > *node, lto_symtab_encoder_t > if (node->global.inlined_to) > return false; > for (e = node->callers; e; e = e->next_caller) > - if (e->caller->in_other_partition > - || !lto_symtab_encoder_in_partition_p (encoder, e->caller)) > - return true; > + { > + /* Ignore references from non-offloadable nodes while streaming NODE > into > + offload LTO section. */ > + if (!e->caller->need_lto_streaming) > + continue; > + > + if (e->caller->in_other_partition > + || !lto_symtab_encoder_in_partition_p (encoder, e->caller)) > + return true; > + } > return false; > } > > @@ -802,6 +814,18 @@ create_references (lto_symtab_encoder_t encoder, > symtab_node *node) > lto_symtab_encoder_encode (encoder, ref->referred); > } > > +/* Select what needs to be streamed out. In regular lto mode stream > everything. > + In offload lto mode stream only stuff marked with an attribute. */ > +void > +select_what_to_stream (bool offload_lto_mode) > +{ > + struct symtab_node *snode; > + FOR_EACH_SYMBOL (snode) > + snode->need_lto_streaming > + = !offload_lto_mode || lookup_attribute ("omp declare target", > + DECL_ATTRIBUTES (snode->decl)); I suppose I suggested this already earlier this year. Why keep this artificial attribute when you have a cgraph node flag? > +} > + > /* Find all symbols we want to stream into given partition and insert them > to encoders. > > @@ -828,6 +852,8 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder) > !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei)) > { > struct cgraph_node *node = lsei_cgraph_node (lsei); > + if (!node->need_lto_streaming) > + continue; > add_node_to (encoder, node, true); > lto_set_symtab_encoder_in_partition (encoder, node); > create_references (encoder, node); > @@ -844,6 +870,8 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder) > { > varpool_node *vnode = lsei_varpool_node (lsei); > > + if (!vnode->need_lto_streaming) > + continue; > lto_set_symtab_encoder_in_partition (encoder, vnode); > lto_set_symtab_encoder_encode_initializer (encoder, vnode); > create_references (encoder, vnode); I leave all the partitioning stuff to Honza to review. > diff --git a/gcc/lto-section-names.h b/gcc/lto-section-names.h > index cb75230..f5dbed2 100644 > --- a/gcc/lto-section-names.h > +++ b/gcc/lto-section-names.h > @@ -25,6 +25,11 @@ along with GCC; see the file COPYING3. If not see > name for the functions and static_initializers. For other types of > sections a '.' and the section type are appended. */ > #define LTO_SECTION_NAME_PREFIX ".gnu.lto_" > +#define OFFLOAD_SECTION_NAME_PREFIX ".gnu.offload_lto_" > + > +/* Can be either OFFLOAD_SECTION_NAME_PREFIX when we stream IR for offload > + compiler, or LTO_SECTION_NAME_PREFIX for LTO case. */ > +extern const char *section_name_prefix; > > /* Segment name for LTO sections. This is only used for Mach-O. */ > > diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c > index 3480723..161e12d 100644 > --- a/gcc/lto-streamer.c > +++ b/gcc/lto-streamer.c > @@ -48,6 +48,7 @@ struct lto_stats_d lto_stats; > static bitmap_obstack lto_obstack; > static bool lto_obstack_initialized; > > +const char *section_name_prefix = LTO_SECTION_NAME_PREFIX; > > /* Return a string representing LTO tag TAG. */ > > @@ -177,7 +178,7 @@ lto_get_section_name (int section_type, const char *name, > struct lto_file_decl_d > sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id); > else > sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed > (false)); > - return concat (LTO_SECTION_NAME_PREFIX, sep, add, post, NULL); > + return concat (section_name_prefix, sep, add, post, NULL); > } > > > diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h > index 4bec969..ba00ab4 100644 > --- a/gcc/lto-streamer.h > +++ b/gcc/lto-streamer.h > @@ -831,6 +831,7 @@ bool referenced_from_this_partition_p (symtab_node *, > bool reachable_from_this_partition_p (struct cgraph_node *, > lto_symtab_encoder_t); > lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder); > +void select_what_to_stream (bool); > > > /* In lto-symtab.c. */ > diff --git a/gcc/lto/lto-object.c b/gcc/lto/lto-object.c > index 323f7b2..4ee752f 100644 > --- a/gcc/lto/lto-object.c > +++ b/gcc/lto/lto-object.c > @@ -230,8 +230,7 @@ lto_obj_add_section (void *data, const char *name, off_t > offset, > void **slot; > struct lto_section_list *list = loasd->list; > > - if (strncmp (name, LTO_SECTION_NAME_PREFIX, > - strlen (LTO_SECTION_NAME_PREFIX)) != 0) > + if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) > return 1; > > new_name = xstrdup (name); > diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c > index 0451a66..aae2be9 100644 > --- a/gcc/lto/lto-partition.c > +++ b/gcc/lto/lto-partition.c > @@ -920,6 +920,8 @@ lto_promote_cross_file_statics (void) > > gcc_assert (flag_wpa); > > + select_what_to_stream (false); > + > /* First compute boundaries. */ > n_sets = ltrans_partitions.length (); > for (i = 0; i < n_sets; i++) > diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c > index 6cbb178..0646da5 100644 > --- a/gcc/lto/lto.c > +++ b/gcc/lto/lto.c > @@ -2125,7 +2125,7 @@ lto_section_with_id (const char *name, unsigned > HOST_WIDE_INT *id) > { > const char *s; > > - if (strncmp (name, LTO_SECTION_NAME_PREFIX, strlen > (LTO_SECTION_NAME_PREFIX))) > + if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) > return 0; > s = strrchr (name, '.'); > return s && sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1; > @@ -2899,6 +2899,10 @@ read_cgraph_and_symbols (unsigned nfiles, const char > **fnames) > > timevar_push (TV_IPA_LTO_DECL_IN); > > +#ifdef ACCEL_COMPILER > + section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; > +#endif > + > real_file_decl_data > = decl_data = ggc_cleared_vec_alloc<lto_file_decl_data_ptr> (nfiles + 1); > real_file_count = nfiles; > diff --git a/gcc/omp-low.c b/gcc/omp-low.c > index eb0a7ee..6156e2f 100644 > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -257,6 +257,16 @@ is_parallel_ctx (omp_context *ctx) > } > > > +/* Return true if CTX is for an omp target region. */ > + > +static inline bool > +is_targetreg_ctx (omp_context *ctx) > +{ > + return gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET > + && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION; > +} > + > + > /* Return true if CTX is for an omp task. */ > > static inline bool > @@ -1930,9 +1940,7 @@ create_omp_child_function (omp_context *ctx, bool > task_copy) > { > omp_context *octx; > for (octx = ctx; octx; octx = octx->outer) > - if (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET > - && gimple_omp_target_kind (octx->stmt) > - == GF_OMP_TARGET_KIND_REGION) > + if (is_targetreg_ctx (octx)) > { > target_p = true; > break; > @@ -2588,8 +2596,7 @@ check_omp_nesting_restrictions (gimple stmt, > omp_context *ctx) > break; > case GIMPLE_OMP_TARGET: > for (; ctx != NULL; ctx = ctx->outer) > - if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET > - && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION) > + if (is_targetreg_ctx (ctx)) > { > const char *name; > switch (gimple_omp_target_kind (stmt)) > @@ -8206,6 +8213,7 @@ expand_omp_target (struct omp_region *region) > if (kind == GF_OMP_TARGET_KIND_REGION) > { > unsigned srcidx, dstidx, num; > + struct cgraph_node *node; > > /* If the target region needs data sent from the parent > function, then the very first statement (except possible > @@ -8337,6 +8345,11 @@ expand_omp_target (struct omp_region *region) > push_cfun (child_cfun); > cgraph_edge::rebuild_edges (); > > + /* Prevent IPA from removing child_fn as unreachable, since there are > no > + refs from the parent function to child_fn in offload LTO mode. */ > + node = cgraph_node::get (child_fn); > + node->mark_force_output (); > + > /* Some EH regions might become dead, see PR34608. If > pass_cleanup_cfg isn't the first pass to happen with the > new child, these dead EH edges might cause problems. > @@ -9207,6 +9220,19 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, > omp_context *ctx) > DECL_COMMON (decl) = 1; > DECL_ARTIFICIAL (decl) = 1; > DECL_IGNORED_P (decl) = 1; > + > + /* If '#pragma omp critical' is inside target region, the symbol must > + have an 'omp declare target' attribute. */ > + omp_context *octx; > + for (octx = ctx->outer; octx; octx = octx->outer) > + if (is_targetreg_ctx (octx)) > + { > + DECL_ATTRIBUTES (decl) > + = tree_cons (get_identifier ("omp declare target"), > + NULL_TREE, DECL_ATTRIBUTES (decl)); Here - why not set a flag on cgraph_get_node (decl) instead? LTO bits are ok apart from the way you split partitioning which I leave to Honza. Also see my suggestion about that odd "omp declare target" attribute. Thanks, Richard. > + break; > + } > + > varpool_node::finalize_decl (decl); > > splay_tree_insert (critical_name_mutexes, (splay_tree_key) name, > diff --git a/gcc/passes.c b/gcc/passes.c > index 5001c3d..0d5667d 100644 > --- a/gcc/passes.c > +++ b/gcc/passes.c > @@ -2297,7 +2297,7 @@ ipa_write_summaries_1 (lto_symtab_encoder_t encoder) > /* Write out summaries for all the nodes in the callgraph. */ > > void > -ipa_write_summaries (void) > +ipa_write_summaries (bool offload_lto_mode) > { > lto_symtab_encoder_t encoder; > int i, order_pos; > @@ -2308,6 +2308,8 @@ ipa_write_summaries (void) > if (!flag_generate_lto || seen_error ()) > return; > > + select_what_to_stream (offload_lto_mode); > + > encoder = lto_symtab_encoder_new (false); > > /* Create the callgraph set in the same order used in > @@ -2334,15 +2336,16 @@ ipa_write_summaries (void) > renumber_gimple_stmt_uids (); > pop_cfun (); > } > - if (node->definition) > + if (node->definition && node->need_lto_streaming) > lto_set_symtab_encoder_in_partition (encoder, node); > } > > FOR_EACH_DEFINED_FUNCTION (node) > - if (node->alias) > + if (node->alias && node->need_lto_streaming) > lto_set_symtab_encoder_in_partition (encoder, node); > FOR_EACH_DEFINED_VARIABLE (vnode) > - lto_set_symtab_encoder_in_partition (encoder, vnode); > + if (vnode->need_lto_streaming) > + lto_set_symtab_encoder_in_partition (encoder, vnode); > > ipa_write_summaries_1 (compute_ltrans_boundary (encoder)); > > diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h > index ed109c3..0bc5ca1 100644 > --- a/gcc/tree-pass.h > +++ b/gcc/tree-pass.h > @@ -592,7 +592,7 @@ extern void pass_fini_dump_file (opt_pass *); > extern const char *get_current_pass_name (void); > extern void print_current_pass (FILE *); > extern void debug_pass (void); > -extern void ipa_write_summaries (void); > +extern void ipa_write_summaries (bool); > extern void ipa_write_optimization_summaries (struct lto_symtab_encoder_d *); > extern void ipa_read_summaries (void); > extern void ipa_read_optimization_summaries (void); > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer