https://gcc.gnu.org/g:63caf6bc2f914518dbfcd242164f5e990982bdf9

commit 63caf6bc2f914518dbfcd242164f5e990982bdf9
Author: Thomas Schwinge <tho...@codesourcery.com>
Date:   Fri Jun 21 10:40:38 2019 -0700

    Add changes to profiling interface from OG8 branch
    
    This bundles up the parts of the profiling code from the OG8 branch that 
were
    not included in the upstream patch.
    
    libgomp/ChangeLog
            * Makefile.am (libgomp_la_SOURCES): Add
            oacc-profiling-acc_register_library.c.
            * Makefile.in: Regenerate.
            * libgomp.texi: Remove paragraph about acc_register_library.
            * oacc-init.c (get_property_any): Add profiling code.
            * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api 
for
            profiling.
            * oacc-profiling-acc_register_library.c: New file.
            * oacc-profiling.c (goacc_profiling_initialize): Call
            acc_register_library.  Avoid duplicate registration.
            (acc_register_library): Remove.
            * config/nvptx/oacc-profiling-acc_register_library.c:
            New empty file.
            * config/nvptx/oacc-profiling.c: Likewise.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove
            call to acc_register_library.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: 
Likewise.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: 
Likewise.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c:
            Likewise.
            * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: 
Likewise.
    
    Co-Authored-By: Maciej W. Rozycki  <ma...@codesourcery.com>

Diff:
---
 libgomp/Makefile.am                                |  2 +-
 libgomp/Makefile.in                                |  7 ++--
 .../nvptx/oacc-profiling-acc_register_library.c    |  0
 libgomp/config/nvptx/oacc-profiling.c              |  0
 libgomp/libgomp.texi                               |  8 -----
 libgomp/oacc-init.c                                | 21 +++++++++++-
 libgomp/oacc-parallel.c                            |  2 ++
 libgomp/oacc-profiling-acc_register_library.c      | 39 ++++++++++++++++++++++
 libgomp/oacc-profiling.c                           | 32 +++++++++++-------
 .../acc_prof-dispatch-1.c                          |  2 --
 .../libgomp.oacc-c-c++-common/acc_prof-init-1.c    |  2 --
 .../libgomp.oacc-c-c++-common/acc_prof-kernels-1.c | 19 +++++++----
 .../acc_prof-parallel-1.c                          |  2 --
 .../acc_prof-valid_bytes-1.c                       |  2 --
 .../libgomp.oacc-c-c++-common/acc_prof-version-1.c |  2 --
 15 files changed, 100 insertions(+), 40 deletions(-)

diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index e3202aeb0e05..a2e531b31496 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c error.c \
        target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
        oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
        priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-       oacc-target.c target-indirect.c
+       oacc-target.c target-indirect.c oacc-profiling-acc_register_library.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 2a0a842af52d..b4a65a756e7b 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo 
critical.lo \
        oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
        oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
        affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
-       oacc-target.lo target-indirect.lo $(am__objects_1)
+       oacc-target.lo oacc-profiling-acc_register_library.lo \
+       target-indirect.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c \
        oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
        oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
        affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-       oacc-target.c target-indirect.c $(am__append_3)
+       oacc-target.c oacc-profiling-acc_register_library.c \
+       target-indirect.c $(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info 
$(libtool_VERSION)
@@ -768,6 +770,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
diff --git a/libgomp/config/nvptx/oacc-profiling-acc_register_library.c 
b/libgomp/config/nvptx/oacc-profiling-acc_register_library.c
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/libgomp/config/nvptx/oacc-profiling.c 
b/libgomp/config/nvptx/oacc-profiling.c
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 6909c2b16f88..d7d42d017582 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6377,14 +6377,6 @@ We just handle one case specially, as required by CUDA 
9.0
 @code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
 callbacks.
 
-We're not yet implementing initialization via a
-@code{acc_register_library} function that is either statically linked
-in, or dynamically via @env{LD_PRELOAD}.
-Initialization via @code{acc_register_library} functions dynamically
-loaded via the @env{ACC_PROFLIB} environment variable does work, as
-does directly calling @code{acc_prof_register},
-@code{acc_prof_unregister}, @code{acc_prof_lookup}.
-
 As currently there are no inquiry functions defined, calls to
 @code{acc_prof_lookup} always returns @code{NULL}.
 
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 3856f85a723d..5fb1bb887b6c 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -810,6 +810,16 @@ get_property_any (int ord, acc_device_t d, 
acc_device_property_t prop)
   if (d == acc_device_current && thr && thr->dev)
     return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
 
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+  if (profiling_p)
+    {
+      prof_info.device_type = d;
+      prof_info.device_number = ord;
+    }
+
   gomp_mutex_lock (&acc_device_lock);
 
   struct gomp_device_descr *dev = resolve_device (d, true);
@@ -830,7 +840,16 @@ get_property_any (int ord, acc_device_t d, 
acc_device_property_t prop)
 
   assert (dev);
 
-  return dev->openacc.get_property_func (dev->target_id, prop);
+  union goacc_property_value propval =
+      dev->openacc.get_property_func (dev->target_id, prop);
+
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+
+  return propval;
 }
 
 size_t
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index a873830b910e..a1fb11b738a2 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -367,6 +367,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
       fn (hostaddrs);
       goto out_prof;
     }
+  else if (profiling_p)
+    api_info.device_api = acc_device_api_cuda;
 
   /* Default: let the runtime choose.  */
   for (i = 0; i != GOMP_DIM_MAX; i++)
diff --git a/libgomp/oacc-profiling-acc_register_library.c 
b/libgomp/oacc-profiling-acc_register_library.c
new file mode 100644
index 000000000000..f6b482b51f4b
--- /dev/null
+++ b/libgomp/oacc-profiling-acc_register_library.c
@@ -0,0 +1,39 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file provides an stub acc_register_library function.  It's in a
+   separate file so that this function can easily be overridden when linking
+   statically.  */
+
+#include "libgomp.h"
+#include "acc_prof.h"
+
+void
+acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
+                     acc_prof_lookup_func lookup)
+{
+  gomp_debug (0, "dummy %s\n", __FUNCTION__);
+}
diff --git a/libgomp/oacc-profiling.c b/libgomp/oacc-profiling.c
index f98cc0a547e9..d6cc9ce956d0 100644
--- a/libgomp/oacc-profiling.c
+++ b/libgomp/oacc-profiling.c
@@ -104,7 +104,12 @@ goacc_profiling_initialize (void)
   for (int i = 0; i < acc_ev_last; ++i)
     goacc_prof_callbacks_enabled[i] = true;
 
-
+  /* We are to invoke an external acc_register_library routine, defaulting to
+     our stub oacc-profiling-acc_register_library.c:acc_register_library
+     implementation.  */
+  gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__);
+  //TODO.
+  acc_register_library (acc_prof_register, acc_prof_unregister, NULL);
 #ifdef PLUGIN_SUPPORT
   char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
   while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
@@ -141,10 +146,20 @@ goacc_profiling_initialize (void)
                = dlsym (dl_handle, "acc_register_library");
              if (a_r_l == NULL)
                goto dl_fail;
-             gomp_debug (0, "  %s: calling %s:acc_register_library\n",
-                         __FUNCTION__, acc_proflib);
-             a_r_l (acc_prof_register, acc_prof_unregister,
-                    acc_prof_lookup);
+             /* Avoid duplicate registration, for example if the same shared
+                library is specified in LD_PRELOAD and ACC_PROFLIB -- which
+                TAU 2.26 does when using "tau_exec -openacc".  */
+             if (a_r_l != acc_register_library)
+               {
+                 gomp_debug (0, "  %s: calling %s:acc_register_library\n",
+                             __FUNCTION__, acc_proflib);
+                 //TODO.
+                 a_r_l (acc_prof_register, acc_prof_unregister, NULL);
+               }
+             else
+               gomp_debug (0, "  %s: skipping duplicate"
+                           " %s:acc_register_library\n",
+                           __FUNCTION__, acc_proflib);
            }
          else
            {
@@ -487,13 +502,6 @@ acc_prof_lookup (const char *name)
   return NULL;
 }
 
-void
-acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
-                     acc_prof_lookup_func lookup)
-{
-  gomp_fatal ("TODO");
-}
-
 /* Prepare to dispatch events?  */
 
 bool
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c
index d929bfd80a4f..a9a8c74150c9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c
@@ -114,8 +114,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg 
unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, 
acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
   reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
index b5e771554601..91b373216c93 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
@@ -270,8 +270,6 @@ static void cb_compute_construct_end (acc_prof_info 
*prof_info, acc_event_info *
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, 
acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
   reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
index 2c8539714740..2cd2c98ddf1c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
@@ -59,6 +59,7 @@ static int state = -1;
 static acc_device_t acc_device_type;
 static int acc_device_num;
 static int num_gangs, num_workers, vector_length;
+static int async;
 
 
 static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info 
*event_info, acc_api_info *api_info)
@@ -76,7 +77,7 @@ static void cb_enqueue_launch_start (acc_prof_info 
*prof_info, acc_event_info *e
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == acc_async_noval);
+  assert (prof_info->async == async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
@@ -166,8 +167,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg 
unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, 
acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
   assert (state == 0);
@@ -176,8 +175,10 @@ int main()
   acc_device_num = acc_get_device_num (acc_device_type);
   assert (state == 0);
 
-  /* Parallelism dimensions: compiler/runtime decides.  */
   STATE_OP (state, = 0);
+  /* Implicit async.  */
+  async = acc_async_noval;
+  /* Parallelism dimensions: compiler/runtime decides.  */
   num_gangs = num_workers = vector_length = 0;
   {
 #define N 100
@@ -203,8 +204,10 @@ int main()
 #undef N
   }
 
-  /* Parallelism dimensions: literal.  */
   STATE_OP (state, = 0);
+  /* Explicit async: without argument.  */
+  async = acc_async_noval;
+  /* Parallelism dimensions: literal.  */
   num_gangs = 30;
   num_workers = 3;
   vector_length = 5;
@@ -212,6 +215,7 @@ int main()
 #define N 100
     int x[N];
 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
+  async \
   num_gangs (30) num_workers (3) vector_length (5)
     /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in 
block requested to be made addressable} {} { target *-*-* } l_compute$c_compute 
}
        { dg-note {variable 'i' made addressable} {} { target *-*-* } 
l_compute$c_compute } */
@@ -234,8 +238,10 @@ int main()
 #undef N
   }
 
-  /* Parallelism dimensions: variable.  */
   STATE_OP (state, = 0);
+  /* Explicit async: variable.  */
+  async = 123;
+  /* Parallelism dimensions: variable.  */
   num_gangs = 22;
   num_workers = 5;
   vector_length = 7;
@@ -243,6 +249,7 @@ int main()
 #define N 100
     int x[N];
 #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
+  async (async) \
   num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
     /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in 
block requested to be made addressable} {} { target *-*-* } l_compute$c_compute 
}
        { dg-note {variable 'i' made addressable} {} { target *-*-* } 
l_compute$c_compute } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
index 9b4493ddb7f6..27f86d3160f8 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
@@ -830,8 +830,6 @@ static void cb_enqueue_launch_end (acc_prof_info 
*prof_info, acc_event_info *eve
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, 
acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
   reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
diff --git 
a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c
index 5b58c51d4c42..a723ad97b933 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c
@@ -143,8 +143,6 @@ typedef struct E
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, 
acc_prof_lookup);
-
   A A1;
   DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A);
   assert (VALID_BYTES_A <= sizeof A1);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c
index f53786871671..0f9e9562bc61 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c
@@ -56,8 +56,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg 
unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, 
acc_prof_lookup);
-
   ev_count = 0;
 
   /* Trigger tests done in 'cb_*' functions.  */

Reply via email to