On Wed, Jun 29, 2022 at 04:33:02PM +0200, Tobias Burnus wrote: > --- a/gcc/c/c-parser.cc > +++ b/gcc/c/c-parser.cc > @@ -2488,6 +2488,12 @@ c_parser_declaration_or_fndef (c_parser *parser, bool > fndef_ok, > break; > } > > + if (flag_openmp > + && lookup_attribute ("omp declare target", > + DECL_ATTRIBUTES (current_function_decl))) > + omp_requires_mask > + = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED); > + > if (DECL_DECLARED_INLINE_P (current_function_decl)) > tv = TV_PARSE_INLINE; > else
I thought the above would be left out, at least until clarified what exactly we mean with device routines in the restrictions. > --- a/gcc/cp/parser.cc > +++ b/gcc/cp/parser.cc > @@ -15389,6 +15389,11 @@ cp_parser_simple_declaration (cp_parser* parser, > /* Otherwise, we're done with the list of declarators. */ > else > { > + if (flag_openmp && lookup_attribute ("omp declare target", > + DECL_ATTRIBUTES (decl))) > + omp_requires_mask > + = (enum omp_requires) (omp_requires_mask > + | OMP_REQUIRES_TARGET_USED); > pop_deferring_access_checks (); > cp_finalize_omp_declare_simd (parser, &odsd); > return; And the above too. On the Fortran side I don't see it being done. > --- a/gcc/lto-cgraph.cc > +++ b/gcc/lto-cgraph.cc > @@ -1068,12 +1069,28 @@ read_string (class lto_input_block *ib) > void > output_offload_tables (void) > { > - if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)) > + bool output_requires = (flag_openmp > + && (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != > 0); > + if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars) > + && !output_requires) > return; > > struct lto_simple_output_block *ob > = lto_create_simple_output_block (LTO_section_offload_table); > > + if (output_requires) > + { > + HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask > + & (OMP_REQUIRES_UNIFIED_ADDRESS > + | OMP_REQUIRES_UNIFIED_SHARED_MEMORY > + | OMP_REQUIRES_REVERSE_OFFLOAD > + | OMP_REQUIRES_TARGET_USED)); Why is the OMP_REQUIRES_TARGET_USED bit saved there? It is always set if output_requires... If we want to make sure it is set in omp_requires_mask after streaming in, we can just or it in back there. > @@ -1811,6 +1844,24 @@ input_offload_tables (bool do_force_output) > if (do_force_output) > varpool_node::get (var_decl)->force_output = 1; > } > + else if (tag == LTO_symtab_edge) > + { > + static bool error_emitted = false; > + HOST_WIDE_INT val = streamer_read_hwi (ib); > + > + if (omp_requires_mask == 0) > + omp_requires_mask = (omp_requires) val; I mean here: (omp_requires) (val | OMP_REQUIRES_TARGET_USED); > + else if (omp_requires_mask != val && !error_emitted) > + { > + char buf[64], buf2[64]; > + omp_requires_to_name (buf, sizeof (buf), omp_requires_mask); > + omp_requires_to_name (buf2, sizeof (buf2), val); > + error ("OpenMP %<requires%> directive with non-identical " > + "clauses in multiple compilation units: %qs vs. %qs", > + buf, buf2); I think the user should be told also where, so file_data->file_name and saved another filename from the if (omp_requires_mask == 0) body. I admit I haven't investigated whether it would be enough to just record the const char * file_data->file_name or whether what it points to might be freed before we report it. > + error_emitted = true; > --- a/gcc/omp-offload.cc > +++ b/gcc/omp-offload.cc > @@ -398,6 +399,26 @@ omp_finish_file (void) > unsigned num_funcs = vec_safe_length (offload_funcs); > unsigned num_vars = vec_safe_length (offload_vars); > > +#ifndef ACCEL_COMPILER > + if (flag_openmp && (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != 0) > + { > + tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, > + get_identifier ("__offload_requires_mask"), > + unsigned_type_node); > + TREE_PUBLIC (var) = 1; > + TREE_STATIC (var) = 1; > + TREE_READONLY (var) = 1; > + DECL_INITIAL (var) > + = build_int_cst (unsigned_type_node, > + ((unsigned int) omp_requires_mask > + & (OMP_REQUIRES_UNIFIED_ADDRESS > + | OMP_REQUIRES_UNIFIED_SHARED_MEMORY > + | OMP_REQUIRES_REVERSE_OFFLOAD))); > + declare_weak (var); > + varpool_node::finalize_decl (var); > + } > +#endif I think this __offload_requires_mask can't work reliably, not only because it is a single var per process while one can have target regions in multiple shared libraries (I know we've discussed that it doesn't always work reliably, but we shouldn't hardcode something that will prevent it from working properly altogether), but also because one can e.g. use symbol versioning or simple linker script and __offload_requires_mask won't be visible to libgomp. Can't we arrange for GOMP_offload_register_ver to see the bitmasks somewhere instead (and force GOMP_offload_register_ver even if there are no vars or funcs and just the requires mask)? GOMP_offload_register_ver passes a version number, perhaps we could bump GOMP_VERSION from 1 to 2 and store the bitmask somewhere in the target data and on the libgomp side handle both GOMP_VERSION_LIB (version) 1 and 2, the former by pretending there is 0 bitmask? There can be various ways how to get the bitmask into config/*/*mkoffload.cc so that it can add it there. Could be the weak __offload_requires_mask (but we probably e.g. can't assume declare_weak will work), which we'd make also hidden and the *mkoffload.cc generated source would refer to its address (but have it declared hidden so that if it isn't present in current TU, we get NULL). Disadvantage is the relocation. Or we could ask for the offloading lto1 when it merges those from different offloadng TUs to emit some magic symbol in what it emits and have mkoffload search for it. Or mkoffload could pass some option to the offloading lto compilation, say filename of a temporary file, let lto1 save into that file the bitmask and let mkoffload read it. Or env var. > --- a/libgomp/libgomp-plugin.h > +++ b/libgomp/libgomp-plugin.h > @@ -125,7 +125,7 @@ extern void GOMP_PLUGIN_fatal (const char *, ...) > extern const char *GOMP_OFFLOAD_get_name (void); > extern unsigned int GOMP_OFFLOAD_get_caps (void); > extern int GOMP_OFFLOAD_get_type (void); > -extern int GOMP_OFFLOAD_get_num_devices (void); > +extern int GOMP_OFFLOAD_get_num_devices (unsigned int); This is an ABI change for plugins, don't we want to e.g. rename the symbol as well, so that plugin mismatches are caught more easily? Jakub