Hi!

On Mon, 28 Sep 2015 15:38:57 -0400, Nathan Sidwell <nat...@acm.org> wrote:
> On 09/24/15 04:40, Jakub Jelinek wrote:
> > Iff GCC 5 compiled offloaded OpenACC/PTX code will always do host fallback
> > anyway because of the incompatible PTX version

I do agree that it's reasonable to require users to re-compile their code
when switching between major GCC releases, to retain the offloading
feature, or otherwise resort to host fallback execution.  I'll propose
some text along these lines for the GCC 6 release notes.

> > why don't you just
> > do
> >    goacc_save_and_set_bind (acc_device_host);
> >    fn (hostaddrs);
> >    goacc_restore_bind ();
> 
> Committed the  attached.  Thanks for the review.

What we now got, doesn't work, for several reasons.  GCC 5 OpenACC
offloading executables will just run into SIGSEGV.  Here is a patch
(which depends on
<http://news.gmane.org/find-root.php?message_id=%3C87a8ko3ea0.fsf%40hertz.schwinge.homeip.net%3E>).
Unfortunately, we have to jump through some hoops: because GCC 5
compiler-generated OpenACC reductions code emits calls to
acc_get_device_type, and because we'll (have to) always resort to host
fallback execution for GCC 5 executables, we also have to enforce these
acc_get_device_type calls to return acc_device_host; otherwise reductions
will give bogus results.  (I hope I'm correctly implementing/using the
symbol versioning "magic".)  OK for gcc-6-branch and trunk?  Assuming we
want this fixed on gcc-6-branch, should it be part of 6.1 (to avoid 6.1
users running into the SIGSEGV), or delay for 6.2?

We don't have an easy way to add test cases to make sure we don't break
such legacy interfaces, do we?  (So, I just manually checked a few test
cases.)

commit c68c6b8e79176f5dc21684efe2517cbfb83a182e
Author: Thomas Schwinge <tho...@codesourcery.com>
Date:   Wed Apr 20 13:08:57 2016 +0200

    libgomp: Make GCC 5 OpenACC offloading executables work
    
        * libgomp.h: Include "openacc.h".
        (goacc_get_device_type_201, goacc_get_device_type_20): New
        prototypes.
        (oacc_20_201_symver, goacc_get_device_type_201): New macros.
        * libgomp.map: Add acc_get_device_type with OACC_2.0.1 symbol
        version.
        * oacc-init.c (acc_get_device_type): Rename to
        goacc_get_device_type_201.
        (goacc_get_device_type_20): New function.
        * oacc-parallel.c (GOACC_parallel): Call goacc_lazy_initialize.
        * plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Refuse version
        0 offload images.
        * target.c (gomp_load_image_to_device): Gracefully handle the case
        that a plugin refuses to load offload images.
---
 libgomp/libgomp.h             | 10 ++++++++++
 libgomp/libgomp.map           | 10 ++++++++++
 libgomp/oacc-init.c           | 18 +++++++++++++++++-
 libgomp/oacc-parallel.c       | 11 +++++++++++
 libgomp/plugin/plugin-nvptx.c | 10 +++++++++-
 libgomp/target.c              |  6 +++++-
 6 files changed, 62 insertions(+), 3 deletions(-)

diff --git libgomp/libgomp.h libgomp/libgomp.h
index 6a05bbc..9fa1cb1 100644
--- libgomp/libgomp.h
+++ libgomp/libgomp.h
@@ -1011,6 +1011,8 @@ gomp_work_share_init_done (void)
 /* Now that we're back to default visibility, include the globals.  */
 #include "libgomp_g.h"
 
+#include "openacc.h"
+
 /* Include omp.h by parts.  */
 #include "omp-lock.h"
 #define _LIBGOMP_OMP_LOCK_DEFINED 1
@@ -1047,11 +1049,17 @@ extern void gomp_set_nest_lock_25 (omp_nest_lock_25_t 
*) __GOMP_NOTHROW;
 extern void gomp_unset_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW;
 extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) __GOMP_NOTHROW;
 
+extern acc_device_t goacc_get_device_type_201 (void) __GOACC_NOTHROW;
+extern acc_device_t goacc_get_device_type_20 (void) __GOACC_NOTHROW;
+
 # define strong_alias(fn, al) \
   extern __typeof (fn) al __attribute__ ((alias (#fn)));
 # define omp_lock_symver(fn) \
   __asm (".symver g" #fn "_30, " #fn "@@OMP_3.0"); \
   __asm (".symver g" #fn "_25, " #fn "@OMP_1.0");
+# define oacc_20_201_symver(fn) \
+  __asm (".symver go" #fn "_201, " #fn "@@OACC_2.0.1"); \
+  __asm (".symver go" #fn "_20, " #fn "@OACC_2.0");
 #else
 # define gomp_init_lock_30 omp_init_lock
 # define gomp_destroy_lock_30 omp_destroy_lock
@@ -1063,6 +1071,8 @@ extern int gomp_test_nest_lock_25 (omp_nest_lock_25_t *) 
__GOMP_NOTHROW;
 # define gomp_set_nest_lock_30 omp_set_nest_lock
 # define gomp_unset_nest_lock_30 omp_unset_nest_lock
 # define gomp_test_nest_lock_30 omp_test_nest_lock
+
+# define goacc_get_device_type_201 acc_get_device_type
 #endif
 
 #ifdef HAVE_ATTRIBUTE_VISIBILITY
diff --git libgomp/libgomp.map libgomp/libgomp.map
index 4d42c42..4803aab 100644
--- libgomp/libgomp.map
+++ libgomp/libgomp.map
@@ -304,7 +304,12 @@ OACC_2.0 {
        acc_get_num_devices_h_;
        acc_set_device_type;
        acc_set_device_type_h_;
+#ifdef HAVE_SYMVER_SYMBOL_RENAMING_RUNTIME_SUPPORT
+       # If the assembler used lacks the .symver directive or the linker
+       # doesn't support GNU symbol versioning, we have the same symbol in
+       # two versions, which Sun ld chokes on.
        acc_get_device_type;
+#endif
        acc_get_device_type_h_;
        acc_set_device_num;
        acc_set_device_num_h_;
@@ -378,6 +383,11 @@ OACC_2.0 {
        acc_set_cuda_stream;
 };
 
+OACC_2.0.1 {
+  global:
+       acc_get_device_type;
+} OACC_2.0;
+
 GOACC_2.0 {
   global:
        GOACC_data_end;
diff --git libgomp/oacc-init.c libgomp/oacc-init.c
index 42d005d..a7a2243 100644
--- libgomp/oacc-init.c
+++ libgomp/oacc-init.c
@@ -528,7 +528,7 @@ acc_set_device_type (acc_device_t d)
 ialias (acc_set_device_type)
 
 acc_device_t
-acc_get_device_type (void)
+goacc_get_device_type_201 (void)
 {
   acc_device_t res = acc_device_none;
   struct gomp_device_descr *dev;
@@ -552,8 +552,24 @@ acc_get_device_type (void)
   return res;
 }
 
+#ifdef LIBGOMP_GNU_SYMBOL_VERSIONING
+
+/* Legacy entry point (GCC 5).  Only provide host fallback execution.  */
+
+acc_device_t
+goacc_get_device_type_20 (void)
+{
+  return acc_device_host;
+}
+
+oacc_20_201_symver (acc_get_device_type)
+
+#else /* LIBGOMP_GNU_SYMBOL_VERSIONING */
+
 ialias (acc_get_device_type)
 
+#endif /* LIBGOMP_GNU_SYMBOL_VERSIONING */
+
 int
 acc_get_device_num (acc_device_t d)
 {
diff --git libgomp/oacc-parallel.c libgomp/oacc-parallel.c
index 9fe5020..321fd66 100644
--- libgomp/oacc-parallel.c
+++ libgomp/oacc-parallel.c
@@ -203,6 +203,17 @@ GOACC_parallel (int device, void (*fn) (void *),
                int num_gangs, int num_workers, int vector_length,
                int async, int num_waits, ...)
 {
+#ifdef HAVE_INTTYPES_H
+  gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, sizes=%p, kinds=%p, "
+                "async = %d\n",
+             __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
+#else
+  gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, 
async=%d\n",
+             __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
+             async);
+#endif
+  goacc_lazy_initialize ();
+
   goacc_save_and_set_bind (acc_device_host);
   fn (hostaddrs);
   goacc_restore_bind ();
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index fc5f298..56e6fae 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -1537,7 +1537,15 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, 
const void *target_data,
     GOMP_PLUGIN_fatal ("Offload data incompatible with PTX plugin"
                       " (expected %u, received %u)",
                       GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version));
-  
+  if (GOMP_VERSION_DEV (version) == 0)
+    {
+      /* We're no longer support offload data generated by version 0 mkoffload;
+        it won't be used in the legacy GOMP_parallel entry point.  */
+      GOMP_PLUGIN_debug (0, "Offload data not loaded (version %u)\n",
+                        GOMP_VERSION_DEV (version));
+      return -1;
+    }
+
   GOMP_OFFLOAD_init_device (ord);
 
   dev = ptx_devices[ord];
diff --git libgomp/target.c libgomp/target.c
index dd6f74d..2fbfa6e 100644
--- libgomp/target.c
+++ libgomp/target.c
@@ -1008,7 +1008,11 @@ gomp_load_image_to_device (struct gomp_device_descr 
*devicep, unsigned version,
   num_target_entries
     = devicep->load_image_func (devicep->target_id, version,
                                target_data, &target_table);
-
+  if (num_target_entries < 0)
+    {
+      /* The plugin refused this offload data.  */
+      return;
+    }
   if (num_target_entries != num_funcs + num_vars)
     {
       gomp_mutex_unlock (&devicep->lock);


Grüße
 Thomas

Attachment: signature.asc
Description: PGP signature

Reply via email to