On Wed, Jun 29, 2022 at 04:33:02PM +0200, Tobias Burnus wrote:
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -2488,6 +2488,12 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
> fndef_ok,
>         break;
>       }
>  
> +      if (flag_openmp
> +       && lookup_attribute ("omp declare target",
> +                            DECL_ATTRIBUTES (current_function_decl)))
> +     omp_requires_mask
> +       = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
> +
>        if (DECL_DECLARED_INLINE_P (current_function_decl))
>          tv = TV_PARSE_INLINE;
>        else

I thought the above would be left out, at least until clarified what exactly
we mean with device routines in the restrictions.

> --- a/gcc/cp/parser.cc
> +++ b/gcc/cp/parser.cc
> @@ -15389,6 +15389,11 @@ cp_parser_simple_declaration (cp_parser* parser,
>         /* Otherwise, we're done with the list of declarators.  */
>         else
>           {
> +           if (flag_openmp && lookup_attribute ("omp declare target",
> +                                                DECL_ATTRIBUTES (decl)))
> +             omp_requires_mask
> +               = (enum omp_requires) (omp_requires_mask
> +                                      | OMP_REQUIRES_TARGET_USED);
>             pop_deferring_access_checks ();
>             cp_finalize_omp_declare_simd (parser, &odsd);
>             return;

And the above too.

On the Fortran side I don't see it being done.
> --- a/gcc/lto-cgraph.cc
> +++ b/gcc/lto-cgraph.cc
> @@ -1068,12 +1069,28 @@ read_string (class lto_input_block *ib)
>  void
>  output_offload_tables (void)
>  {
> -  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
> +  bool output_requires = (flag_openmp
> +                       && (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != 
> 0);
> +  if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)
> +      && !output_requires)
>      return;
>  
>    struct lto_simple_output_block *ob
>      = lto_create_simple_output_block (LTO_section_offload_table);
>  
> +  if (output_requires)
> +    {
> +      HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask
> +                        & (OMP_REQUIRES_UNIFIED_ADDRESS
> +                           | OMP_REQUIRES_UNIFIED_SHARED_MEMORY
> +                           | OMP_REQUIRES_REVERSE_OFFLOAD
> +                           | OMP_REQUIRES_TARGET_USED));

Why is the OMP_REQUIRES_TARGET_USED bit saved there?  It is always set
if output_requires...
If we want to make sure it is set in omp_requires_mask after streaming
in, we can just or it in back there.

> @@ -1811,6 +1844,24 @@ input_offload_tables (bool do_force_output)
>             if (do_force_output)
>               varpool_node::get (var_decl)->force_output = 1;
>           }
> +       else if (tag == LTO_symtab_edge)
> +         {
> +           static bool error_emitted = false;
> +           HOST_WIDE_INT val = streamer_read_hwi (ib);
> +
> +           if (omp_requires_mask == 0)
> +             omp_requires_mask = (omp_requires) val;

I mean here: (omp_requires) (val | OMP_REQUIRES_TARGET_USED);

> +           else if (omp_requires_mask != val && !error_emitted)
> +             {
> +               char buf[64], buf2[64];
> +               omp_requires_to_name (buf, sizeof (buf), omp_requires_mask);
> +               omp_requires_to_name (buf2, sizeof (buf2), val);
> +               error ("OpenMP %<requires%> directive with non-identical "
> +                      "clauses in multiple compilation units: %qs vs. %qs",
> +                      buf, buf2);

I think the user should be told also where, so file_data->file_name and
saved another filename from the if (omp_requires_mask == 0) body.
I admit I haven't investigated whether it would be enough to just record
the const char * file_data->file_name or whether what it points to might be
freed before we report it.

> +               error_emitted = true;

> --- a/gcc/omp-offload.cc
> +++ b/gcc/omp-offload.cc
> @@ -398,6 +399,26 @@ omp_finish_file (void)
>    unsigned num_funcs = vec_safe_length (offload_funcs);
>    unsigned num_vars = vec_safe_length (offload_vars);
>  
> +#ifndef ACCEL_COMPILER
> +  if (flag_openmp && (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != 0)
> +    {
> +      tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
> +                              get_identifier ("__offload_requires_mask"),
> +                              unsigned_type_node);
> +      TREE_PUBLIC (var) = 1;
> +      TREE_STATIC (var) = 1;
> +      TREE_READONLY (var) = 1;
> +      DECL_INITIAL (var)
> +     = build_int_cst (unsigned_type_node,
> +                      ((unsigned int) omp_requires_mask
> +                       & (OMP_REQUIRES_UNIFIED_ADDRESS
> +                          | OMP_REQUIRES_UNIFIED_SHARED_MEMORY
> +                          | OMP_REQUIRES_REVERSE_OFFLOAD)));
> +      declare_weak (var);
> +      varpool_node::finalize_decl (var);
> +    }
> +#endif

I think this __offload_requires_mask can't work reliably, not only because
it is a single var per process while one can have target regions in
multiple shared libraries (I know we've discussed that it doesn't always
work reliably, but we shouldn't hardcode something that will prevent it from
working properly altogether), but also because one can e.g. use symbol
versioning or simple linker script and __offload_requires_mask won't be
visible to libgomp.

Can't we arrange for GOMP_offload_register_ver to see the bitmasks somewhere
instead (and force GOMP_offload_register_ver even if there are no vars or
funcs and just the requires mask)?

GOMP_offload_register_ver passes a version number, perhaps we could bump
GOMP_VERSION from 1 to 2 and store the bitmask somewhere in the target data
and on the libgomp side handle both GOMP_VERSION_LIB (version) 1 and 2,
the former by pretending there is 0 bitmask?

There can be various ways how to get the bitmask into
config/*/*mkoffload.cc so that it can add it there.

Could be the weak __offload_requires_mask (but we probably e.g. can't assume
declare_weak will work), which we'd make also hidden and the *mkoffload.cc
generated source would refer to its address (but have it declared hidden so
that if it isn't present in current TU, we get NULL).  Disadvantage is the
relocation.

Or we could ask for the offloading lto1 when it merges those from different
offloadng TUs to emit some magic symbol in what it emits and have mkoffload
search for it.

Or mkoffload could pass some option to the offloading lto compilation, say
filename of a temporary file, let lto1 save into that file the bitmask and
let mkoffload read it.  Or env var.

> --- a/libgomp/libgomp-plugin.h
> +++ b/libgomp/libgomp-plugin.h
> @@ -125,7 +125,7 @@ extern void GOMP_PLUGIN_fatal (const char *, ...)
>  extern const char *GOMP_OFFLOAD_get_name (void);
>  extern unsigned int GOMP_OFFLOAD_get_caps (void);
>  extern int GOMP_OFFLOAD_get_type (void);
> -extern int GOMP_OFFLOAD_get_num_devices (void);
> +extern int GOMP_OFFLOAD_get_num_devices (unsigned int);

This is an ABI change for plugins, don't we want to e.g. rename
the symbol as well, so that plugin mismatches are caught more easily?

        Jakub

Reply via email to