On Wed, 1 Oct 2014, Ilya Verbin wrote:

> On 30 Sep 13:40, Thomas Schwinge wrote:
> > As just discussed for the libgcc changes in
> > <http://news.gmane.org/find-root.php?message_id=%3C87d2ad73ze.fsf%40schwinge.name%3E>,
> > just some suggestions regarding the terminology, where I think that the
> > term »target« might be confusing in comments or symbols' names.  That is,
> > in the following, »target« should possibly be replaced by »offload[ing]«
> > or similar:
> > 
> > What about:
> > 
> >     #define OFFLOAD_SECTION_NAME_PREFIX ".gnu.offload_lto_"
> 
> Renamed, patch is updated.
> 
> Thanks,
>   -- Ilya
> 
> 
> gcc/
>       * cgraph.h (symtab_node): Add need_lto_streaming flag.
>       * cgraphunit.c: Include lto-section-names.h.
>       (initialize_offload): New function.
>       (ipa_passes): Initialize offload and call ipa_write_summaries if there
>       is something to write to OFFLOAD_SECTION_NAME_PREFIX sections.
>       (symbol_table::compile): Call lto_streamer_hooks_init under flag_openmp.
>       * ipa-inline-analysis.c (inline_generate_summary): Do not exit under
>       flag_openmp.
>       (inline_free_summary): Always remove hooks.
>       * lto-cgraph.c (referenced_from_other_partition_p): Ignore references
>       from non-offloadable nodes while streaming a node into offload section.
>       (reachable_from_other_partition_p): Likewise.
>       (select_what_to_stream): New function.
>       (compute_ltrans_boundary): Do not call
>       lto_set_symtab_encoder_in_partition if the node should not be streamed.
>       * lto-section-names.h (OFFLOAD_SECTION_NAME_PREFIX): Define.
>       (section_name_prefix): Declare.
>       * lto-streamer.c (section_name_prefix): New variable.
>       (lto_get_section_name): Use section_name_prefix instead of
>       LTO_SECTION_NAME_PREFIX.
>       * lto-streamer.h (select_what_to_stream): Declare.
>       * omp-low.c (is_targetreg_ctx): New function.
>       (create_omp_child_function, check_omp_nesting_restrictions): Use it.
>       (expand_omp_target): Set mark_force_output for the offloaded functions.
>       (lower_omp_critical): Add target attribute for omp critical symbol.
>       * passes.c (ipa_write_summaries): New argument offload_lto_mode.  Call
>       select_what_to_stream.  Do not call lto_set_symtab_encoder_in_partition
>       if the node should not be streamed out.
>       * tree-pass.h (ipa_write_summaries): New bool argument.
> gcc/lto/
>       * lto-object.c (lto_obj_add_section): Use section_name_prefix instead of
>       LTO_SECTION_NAME_PREFIX.
>       * lto-partition.c (lto_promote_cross_file_statics): Call
>       select_what_to_stream.
>       * lto.c (lto_section_with_id): Use section_name_prefix instead of
>       LTO_SECTION_NAME_PREFIX.
>       (read_cgraph_and_symbols): Read OFFLOAD_SECTION_NAME_PREFIX sections, if
>       being built as an offload compiler.
> 
> ---
> 
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 4fd58a5..df0b0e2 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -444,6 +444,10 @@ public:
>    /* Set when init priority is set.  */
>    unsigned in_init_priority_hash : 1;
>  
> +  /* Set when symbol needs to be streamed into LTO bytecode for LTO, or in 
> case
> +     of offloading, for separate compilation for a different target.  */
> +  unsigned need_lto_streaming : 1;
> +
>  
>    /* Ordering of all symtab entries.  */
>    int order;
> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> index d463505..5eb9d64 100644
> --- a/gcc/cgraphunit.c
> +++ b/gcc/cgraphunit.c
> @@ -211,6 +211,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-nested.h"
>  #include "gimplify.h"
>  #include "dbgcnt.h"
> +#include "lto-section-names.h"
>  
>  /* Queue of cgraph nodes scheduled to be added into cgraph.  This is a
>     secondary queue used during optimization to accommodate passes that
> @@ -1994,9 +1995,40 @@ output_in_order (bool no_reorder)
>    free (nodes);
>  }
>  
> +/* Check whether there is at least one function or global variable to 
> offload.
> +   */
> +
> +static bool
> +initialize_offload (void)
> +{
> +  bool have_offload = false;
> +  struct cgraph_node *node;
> +  struct varpool_node *vnode;
> +
> +  FOR_EACH_DEFINED_FUNCTION (node)
> +    if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES 
> (node->decl)))
> +      {
> +     have_offload = true;
> +     break;
> +      }
> +
> +  FOR_EACH_DEFINED_VARIABLE (vnode)
> +    {
> +      if (!lookup_attribute ("omp declare target",
> +                          DECL_ATTRIBUTES (vnode->decl))
> +       || TREE_CODE (vnode->decl) != VAR_DECL
> +       || DECL_SIZE (vnode->decl) == 0)
> +     continue;
> +      have_offload = true;
> +    }
> +
> +  return have_offload;
> +}
> +

I wonder if we can avoid the above by means of a global have_offload
flag?  (or inside gcc::context)

>  static void
>  ipa_passes (void)
>  {
> +  bool have_offload = false;
>    gcc::pass_manager *passes = g->get_passes ();
>  
>    set_cfun (NULL);
> @@ -2004,6 +2036,14 @@ ipa_passes (void)
>    gimple_register_cfg_hooks ();
>    bitmap_obstack_initialize (NULL);
>  
> +  if (!in_lto_p && flag_openmp)

As -fopenmp is not generally available it's odd to test
flag_openmp (though that is available everywhere as
implementation detail).  Doesn't offloading work
without -fopenmp?

> +    {
> +      have_offload = initialize_offload ();
> +      /* OpenMP offloading requires LTO infrastructure.  */
> +      if (have_offload)
> +     flag_generate_lto = 1;
> +    }
> +
>    invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL);
>  
>    if (!in_lto_p)
> @@ -2041,7 +2081,18 @@ ipa_passes (void)
>      targetm.asm_out.lto_start ();
>  
>    if (!in_lto_p)
> -    ipa_write_summaries ();
> +    {
> +      if (have_offload)
> +     {
> +       section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX;
> +       ipa_write_summaries (true);
> +     }
> +      if (flag_lto)
> +     {
> +       section_name_prefix = LTO_SECTION_NAME_PREFIX;
> +       ipa_write_summaries (false);
> +     }
> +    }
>  
>    if (flag_generate_lto)
>      targetm.asm_out.lto_end ();
> @@ -2122,7 +2173,7 @@ symbol_table::compile (void)
>    state = IPA;
>  
>    /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE.  */
> -  if (flag_lto)
> +  if (flag_lto || flag_openmp)

flag_generate_lto?

>      lto_streamer_hooks_init ();
>  
>    /* Don't run the IPA passes if there was any error or sorry messages.  */
> diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
> index 38f56d2..076a1e8 100644
> --- a/gcc/ipa-inline-analysis.c
> +++ b/gcc/ipa-inline-analysis.c
> @@ -4010,7 +4010,7 @@ inline_generate_summary (void)
>  
>    /* When not optimizing, do not bother to analyze.  Inlining is still done
>       because edge redirection needs to happen there.  */
> -  if (!optimize && !flag_lto && !flag_wpa)
> +  if (!optimize && !flag_lto && !flag_wpa && !flag_openmp)
>      return;

Likewise !flag_generate_lto

>    function_insertion_hook_holder =
> @@ -4325,11 +4325,6 @@ void
>  inline_free_summary (void)
>  {
>    struct cgraph_node *node;
> -  if (!inline_edge_summary_vec.exists ())
> -    return;
> -  FOR_EACH_DEFINED_FUNCTION (node)
> -    if (!node->alias)
> -      reset_inline_summary (node);
>    if (function_insertion_hook_holder)
>      symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder);
>    function_insertion_hook_holder = NULL;
> @@ -4345,6 +4340,11 @@ inline_free_summary (void)
>    if (edge_duplication_hook_holder)
>      symtab->remove_edge_duplication_hook (edge_duplication_hook_holder);
>    edge_duplication_hook_holder = NULL;
> +  if (!inline_edge_summary_vec.exists ())
> +    return;
> +  FOR_EACH_DEFINED_FUNCTION (node)
> +    if (!node->alias)
> +      reset_inline_summary (node);
>    vec_free (inline_summary_vec);
>    inline_edge_summary_vec.release ();
>    if (edge_predicate_pool)
> diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
> index 0584946..c1fccfa 100644
> --- a/gcc/lto-cgraph.c
> +++ b/gcc/lto-cgraph.c
> @@ -321,6 +321,11 @@ referenced_from_other_partition_p (symtab_node *node, 
> lto_symtab_encoder_t encod
>  
>    for (i = 0; node->iterate_referring (i, ref); i++)
>      {
> +      /* Ignore references from non-offloadable nodes while streaming NODE 
> into
> +      offload LTO section.  */
> +      if (!ref->referring->need_lto_streaming)
> +     continue;
> +
>        if (ref->referring->in_other_partition
>            || !lto_symtab_encoder_in_partition_p (encoder, ref->referring))
>       return true;
> @@ -339,9 +344,16 @@ reachable_from_other_partition_p (struct cgraph_node 
> *node, lto_symtab_encoder_t
>    if (node->global.inlined_to)
>      return false;
>    for (e = node->callers; e; e = e->next_caller)
> -    if (e->caller->in_other_partition
> -     || !lto_symtab_encoder_in_partition_p (encoder, e->caller))
> -      return true;
> +    {
> +      /* Ignore references from non-offloadable nodes while streaming NODE 
> into
> +      offload LTO section.  */
> +      if (!e->caller->need_lto_streaming)
> +     continue;
> +
> +      if (e->caller->in_other_partition
> +       || !lto_symtab_encoder_in_partition_p (encoder, e->caller))
> +     return true;
> +    }
>    return false;
>  }
>  
> @@ -802,6 +814,18 @@ create_references (lto_symtab_encoder_t encoder, 
> symtab_node *node)
>        lto_symtab_encoder_encode (encoder, ref->referred);
>  }
>  
> +/* Select what needs to be streamed out.  In regular lto mode stream 
> everything.
> +   In offload lto mode stream only stuff marked with an attribute.  */
> +void
> +select_what_to_stream (bool offload_lto_mode)
> +{
> +  struct symtab_node *snode;
> +  FOR_EACH_SYMBOL (snode)
> +    snode->need_lto_streaming
> +      = !offload_lto_mode || lookup_attribute ("omp declare target",
> +                                            DECL_ATTRIBUTES (snode->decl));

I suppose I suggested this already earlier this year.  Why keep this
artificial attribute when you have a cgraph node flag?

> +}
> +
>  /* Find all symbols we want to stream into given partition and insert them
>     to encoders.
>  
> @@ -828,6 +852,8 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder)
>         !lsei_end_p (lsei); lsei_next_function_in_partition (&lsei))
>      {
>        struct cgraph_node *node = lsei_cgraph_node (lsei);
> +      if (!node->need_lto_streaming)
> +     continue;
>        add_node_to (encoder, node, true);
>        lto_set_symtab_encoder_in_partition (encoder, node);
>        create_references (encoder, node);
> @@ -844,6 +870,8 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder)
>      {
>        varpool_node *vnode = lsei_varpool_node (lsei);
>  
> +      if (!vnode->need_lto_streaming)
> +     continue;
>        lto_set_symtab_encoder_in_partition (encoder, vnode);
>        lto_set_symtab_encoder_encode_initializer (encoder, vnode);
>        create_references (encoder, vnode);

I leave all the partitioning stuff to Honza to review.

> diff --git a/gcc/lto-section-names.h b/gcc/lto-section-names.h
> index cb75230..f5dbed2 100644
> --- a/gcc/lto-section-names.h
> +++ b/gcc/lto-section-names.h
> @@ -25,6 +25,11 @@ along with GCC; see the file COPYING3.  If not see
>     name for the functions and static_initializers.  For other types of
>     sections a '.' and the section type are appended.  */
>  #define LTO_SECTION_NAME_PREFIX ".gnu.lto_"
> +#define OFFLOAD_SECTION_NAME_PREFIX ".gnu.offload_lto_"
> +
> +/* Can be either OFFLOAD_SECTION_NAME_PREFIX when we stream IR for offload
> +   compiler, or LTO_SECTION_NAME_PREFIX for LTO case.  */
> +extern const char *section_name_prefix;
>  
>  /* Segment name for LTO sections.  This is only used for Mach-O.  */
>  
> diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c
> index 3480723..161e12d 100644
> --- a/gcc/lto-streamer.c
> +++ b/gcc/lto-streamer.c
> @@ -48,6 +48,7 @@ struct lto_stats_d lto_stats;
>  static bitmap_obstack lto_obstack;
>  static bool lto_obstack_initialized;
>  
> +const char *section_name_prefix = LTO_SECTION_NAME_PREFIX;
>  
>  /* Return a string representing LTO tag TAG.  */
>  
> @@ -177,7 +178,7 @@ lto_get_section_name (int section_type, const char *name, 
> struct lto_file_decl_d
>      sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, f->id);
>    else
>      sprintf (post, "." HOST_WIDE_INT_PRINT_HEX_PURE, get_random_seed 
> (false)); 
> -  return concat (LTO_SECTION_NAME_PREFIX, sep, add, post, NULL);
> +  return concat (section_name_prefix, sep, add, post, NULL);
>  }
>  
>  
> diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
> index 4bec969..ba00ab4 100644
> --- a/gcc/lto-streamer.h
> +++ b/gcc/lto-streamer.h
> @@ -831,6 +831,7 @@ bool referenced_from_this_partition_p (symtab_node *,
>  bool reachable_from_this_partition_p (struct cgraph_node *,
>                                     lto_symtab_encoder_t);
>  lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder);
> +void select_what_to_stream (bool);
>  
>  
>  /* In lto-symtab.c.  */
> diff --git a/gcc/lto/lto-object.c b/gcc/lto/lto-object.c
> index 323f7b2..4ee752f 100644
> --- a/gcc/lto/lto-object.c
> +++ b/gcc/lto/lto-object.c
> @@ -230,8 +230,7 @@ lto_obj_add_section (void *data, const char *name, off_t 
> offset,
>    void **slot;
>    struct lto_section_list *list = loasd->list;
>  
> -  if (strncmp (name, LTO_SECTION_NAME_PREFIX,
> -            strlen (LTO_SECTION_NAME_PREFIX)) != 0)
> +  if (strncmp (name, section_name_prefix, strlen (section_name_prefix)))
>      return 1;
>  
>    new_name = xstrdup (name);
> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
> index 0451a66..aae2be9 100644
> --- a/gcc/lto/lto-partition.c
> +++ b/gcc/lto/lto-partition.c
> @@ -920,6 +920,8 @@ lto_promote_cross_file_statics (void)
>  
>    gcc_assert (flag_wpa);
>  
> +  select_what_to_stream (false);
> +
>    /* First compute boundaries.  */
>    n_sets = ltrans_partitions.length ();
>    for (i = 0; i < n_sets; i++)
> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
> index 6cbb178..0646da5 100644
> --- a/gcc/lto/lto.c
> +++ b/gcc/lto/lto.c
> @@ -2125,7 +2125,7 @@ lto_section_with_id (const char *name, unsigned 
> HOST_WIDE_INT *id)
>  {
>    const char *s;
>  
> -  if (strncmp (name, LTO_SECTION_NAME_PREFIX, strlen 
> (LTO_SECTION_NAME_PREFIX)))
> +  if (strncmp (name, section_name_prefix, strlen (section_name_prefix)))
>      return 0;
>    s = strrchr (name, '.');
>    return s && sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1;
> @@ -2899,6 +2899,10 @@ read_cgraph_and_symbols (unsigned nfiles, const char 
> **fnames)
>  
>    timevar_push (TV_IPA_LTO_DECL_IN);
>  
> +#ifdef ACCEL_COMPILER
> +    section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX;
> +#endif
> +
>    real_file_decl_data
>      = decl_data = ggc_cleared_vec_alloc<lto_file_decl_data_ptr> (nfiles + 1);
>    real_file_count = nfiles;
> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> index eb0a7ee..6156e2f 100644
> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -257,6 +257,16 @@ is_parallel_ctx (omp_context *ctx)
>  }
>  
>  
> +/* Return true if CTX is for an omp target region.  */
> +
> +static inline bool
> +is_targetreg_ctx (omp_context *ctx)
> +{
> +  return gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
> +      && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION;
> +}
> +
> +
>  /* Return true if CTX is for an omp task.  */
>  
>  static inline bool
> @@ -1930,9 +1940,7 @@ create_omp_child_function (omp_context *ctx, bool 
> task_copy)
>      {
>        omp_context *octx;
>        for (octx = ctx; octx; octx = octx->outer)
> -     if (gimple_code (octx->stmt) == GIMPLE_OMP_TARGET
> -         && gimple_omp_target_kind (octx->stmt)
> -            == GF_OMP_TARGET_KIND_REGION)
> +     if (is_targetreg_ctx (octx))
>         {
>           target_p = true;
>           break;
> @@ -2588,8 +2596,7 @@ check_omp_nesting_restrictions (gimple stmt, 
> omp_context *ctx)
>        break;
>      case GIMPLE_OMP_TARGET:
>        for (; ctx != NULL; ctx = ctx->outer)
> -     if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET
> -         && gimple_omp_target_kind (ctx->stmt) == GF_OMP_TARGET_KIND_REGION)
> +     if (is_targetreg_ctx (ctx))
>         {
>           const char *name;
>           switch (gimple_omp_target_kind (stmt))
> @@ -8206,6 +8213,7 @@ expand_omp_target (struct omp_region *region)
>    if (kind == GF_OMP_TARGET_KIND_REGION)
>      {
>        unsigned srcidx, dstidx, num;
> +      struct cgraph_node *node;
>  
>        /* If the target region needs data sent from the parent
>        function, then the very first statement (except possible
> @@ -8337,6 +8345,11 @@ expand_omp_target (struct omp_region *region)
>        push_cfun (child_cfun);
>        cgraph_edge::rebuild_edges ();
>  
> +      /* Prevent IPA from removing child_fn as unreachable, since there are 
> no
> +      refs from the parent function to child_fn in offload LTO mode.  */
> +      node = cgraph_node::get (child_fn);
> +      node->mark_force_output ();
> +
>        /* Some EH regions might become dead, see PR34608.  If
>        pass_cleanup_cfg isn't the first pass to happen with the
>        new child, these dead EH edges might cause problems.
> @@ -9207,6 +9220,19 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, 
> omp_context *ctx)
>         DECL_COMMON (decl) = 1;
>         DECL_ARTIFICIAL (decl) = 1;
>         DECL_IGNORED_P (decl) = 1;
> +
> +       /* If '#pragma omp critical' is inside target region, the symbol must
> +          have an 'omp declare target' attribute.  */
> +       omp_context *octx;
> +       for (octx = ctx->outer; octx; octx = octx->outer)
> +         if (is_targetreg_ctx (octx))
> +           {
> +             DECL_ATTRIBUTES (decl)
> +               = tree_cons (get_identifier ("omp declare target"),
> +                            NULL_TREE, DECL_ATTRIBUTES (decl));

Here - why not set a flag on cgraph_get_node (decl) instead?

LTO bits are ok apart from the way you split partitioning which I
leave to Honza.  Also see my suggestion about that odd "omp declare 
target" attribute.

Thanks,
Richard.

> +             break;
> +           }
> +
>         varpool_node::finalize_decl (decl);
>  
>         splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
> diff --git a/gcc/passes.c b/gcc/passes.c
> index 5001c3d..0d5667d 100644
> --- a/gcc/passes.c
> +++ b/gcc/passes.c
> @@ -2297,7 +2297,7 @@ ipa_write_summaries_1 (lto_symtab_encoder_t encoder)
>  /* Write out summaries for all the nodes in the callgraph.  */
>  
>  void
> -ipa_write_summaries (void)
> +ipa_write_summaries (bool offload_lto_mode)
>  {
>    lto_symtab_encoder_t encoder;
>    int i, order_pos;
> @@ -2308,6 +2308,8 @@ ipa_write_summaries (void)
>    if (!flag_generate_lto || seen_error ())
>      return;
>  
> +  select_what_to_stream (offload_lto_mode);
> +
>    encoder = lto_symtab_encoder_new (false);
>  
>    /* Create the callgraph set in the same order used in
> @@ -2334,15 +2336,16 @@ ipa_write_summaries (void)
>         renumber_gimple_stmt_uids ();
>         pop_cfun ();
>       }
> -      if (node->definition)
> +      if (node->definition && node->need_lto_streaming)
>          lto_set_symtab_encoder_in_partition (encoder, node);
>      }
>  
>    FOR_EACH_DEFINED_FUNCTION (node)
> -    if (node->alias)
> +    if (node->alias && node->need_lto_streaming)
>        lto_set_symtab_encoder_in_partition (encoder, node);
>    FOR_EACH_DEFINED_VARIABLE (vnode)
> -    lto_set_symtab_encoder_in_partition (encoder, vnode);
> +    if (vnode->need_lto_streaming)
> +      lto_set_symtab_encoder_in_partition (encoder, vnode);
>  
>    ipa_write_summaries_1 (compute_ltrans_boundary (encoder));
>  
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index ed109c3..0bc5ca1 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -592,7 +592,7 @@ extern void pass_fini_dump_file (opt_pass *);
>  extern const char *get_current_pass_name (void);
>  extern void print_current_pass (FILE *);
>  extern void debug_pass (void);
> -extern void ipa_write_summaries (void);
> +extern void ipa_write_summaries (bool);
>  extern void ipa_write_optimization_summaries (struct lto_symtab_encoder_d *);
>  extern void ipa_read_summaries (void);
>  extern void ipa_read_optimization_summaries (void);
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer

Reply via email to