On Fri, Oct 29, 2021 at 05:54:57PM +0200, Tobias Burnus wrote: > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -3926,8 +3926,9 @@ omp_runtime_api_call (const_tree fndecl) > > static const char *omp_runtime_apis[] = > { > - /* This array has 3 sections. First omp_* calls that don't > - have any suffixes. */ > + /* This array has 2 sections. First omp_* calls that don't > + have any suffixes in the DECL_NAME; this includes omp_* > + but also the omp_*_ of libgomp/fortran.c. */ > "aligned_alloc", > "aligned_calloc", > "alloc", > @@ -3941,8 +3942,6 @@ omp_runtime_api_call (const_tree fndecl) > "target_is_present", > "target_memcpy", > "target_memcpy_rect", > - NULL, > - /* Now omp_* calls that are available as omp_* and omp_*_. */ > "capture_affinity", > "destroy_allocator", > "destroy_lock",
If we use just 2 sections, then the two sections should be merged (they were in alphabetic order in each section). Or we can keep 3 sections and say that the first one is for the calls on the library side without suffixes and second is for those with no and _ suffixes, but that in DECL_NAME those don't make a difference. Or make it 3 sections but the first two not separated by NULL but just a comment, i.e. what you have in the patch except that the comments would be adjusted... Either of those 3 section solutions would be more useful if we ever reconsider this and go with DECL_ASSEMBLER_NAME. > @@ -3994,7 +3993,8 @@ omp_runtime_api_call (const_tree fndecl) > "unset_lock", > "unset_nest_lock", > NULL, > - /* And finally calls available as omp_*, omp_*_ and omp_*_8_. */ > + /* Calls available with DECL_NAME omp_* and omp_*_8, the latter matches > + omp_*_8_ in libgomp/fortran.c. */ > "display_env", > "get_ancestor_thread_num", > "init_allocator", > @@ -4024,11 +4024,7 @@ omp_runtime_api_call (const_tree fndecl) > size_t len = strlen (omp_runtime_apis[i]); > if (strncmp (name + 4, omp_runtime_apis[i], len) == 0 > && (name[4 + len] == '\0' > - || (mode > 0 > - && name[4 + len] == '_' > - && (name[4 + len + 1] == '\0' > - || (mode > 1 > - && strcmp (name + 4 + len + 1, "8_") == 0))))) > + || (mode && strcmp (name + 4 + len, "_8") == 0))) > return true; > } > return false; > @@ -4095,9 +4091,24 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool > *handled_ops_p, > "OpenMP runtime API call %qD in a region with " > "%<order(concurrent)%> clause", fndecl); > } > + if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS > + && omp_runtime_api_call (fndecl) > + && strncmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), > + "omp_get_num_teams", > + strlen ("omp_get_num_teams")) != 0 > + && strncmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), > + "omp_get_team_num", > + strlen ("omp_get_team_num")) != 0) If we wanted to optimize, we could decide based on IDENTIFIER_LENGTH whether to use strncmp at all and which one. Your choice. > + #pragma omp distribute > + for (int i = 0; i < 1; ++i) > + if (omp_in_parallel () > + || omp_get_level () != 0 > + || omp_get_ancestor_thread_num (0) != 0 > + || omp_get_ancestor_thread_num (1) != -1) > + abort (); One thing I've missed, with such omp distribute we unfortunately test it only on one of the teams (probably the first one) rather than all of them. Can't we use instead #pragma omp distribute dist_schedule(static,1) for (int i = 0; i < omp_get_num_teams (); ++i) which I believe should ensure that each team will execute exactly one iteration (i.e. exactly what the code has been doing before). Otherwise LGTM. Jakub