'__dso_handle' for '__cxa_atexit', '__cxa_finalize'.  See
<https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.

        PR target/119853
        PR target/119854
        libgcc/
        * config/gcn/crt0.c (_fini_array): Call
        '__GCC_offload___cxa_finalize'.
        * config/nvptx/gbl-ctors.c (__static_do_global_dtors): Likewise.
        libgomp/
        * target-cxa-dso-dtor.c: New.
        * config/accel/target-cxa-dso-dtor.c: Likewise.
        * Makefile.am (libgomp_la_SOURCES): Add it.
        * Makefile.in: Regenerate.
        * testsuite/libgomp.c++/target-cdtor-1.C: New.
        * testsuite/libgomp.c++/target-cdtor-2.C: Likewise.
---
 libgcc/config/gcn/crt0.c                      |  32 ++++
 libgcc/config/nvptx/gbl-ctors.c               |  16 ++
 libgomp/Makefile.am                           |   2 +-
 libgomp/Makefile.in                           |   7 +-
 libgomp/config/accel/target-cxa-dso-dtor.c    |  62 ++++++++
 libgomp/target-cxa-dso-dtor.c                 |   3 +
 .../testsuite/libgomp.c++/target-cdtor-1.C    | 104 +++++++++++++
 .../testsuite/libgomp.c++/target-cdtor-2.C    | 138 ++++++++++++++++++
 8 files changed, 361 insertions(+), 3 deletions(-)
 create mode 100644 libgomp/config/accel/target-cxa-dso-dtor.c
 create mode 100644 libgomp/target-cxa-dso-dtor.c
 create mode 100644 libgomp/testsuite/libgomp.c++/target-cdtor-1.C
 create mode 100644 libgomp/testsuite/libgomp.c++/target-cdtor-2.C

diff --git a/libgcc/config/gcn/crt0.c b/libgcc/config/gcn/crt0.c
index dbd6749a47f..cc23e214cf9 100644
--- a/libgcc/config/gcn/crt0.c
+++ b/libgcc/config/gcn/crt0.c
@@ -24,6 +24,28 @@ typedef long long size_t;
 /* Provide an entry point symbol to silence a linker warning.  */
 void _start() {}
 
+
+#define PR119369_fixed 0
+
+
+/* Host/device compatibility: '__cxa_finalize'.  Dummy; if necessary,
+   overridden via libgomp 'target-cxa-dso-dtor.c'.  */
+
+#if PR119369_fixed
+extern void __GCC_offload___cxa_finalize (void *) __attribute__((weak));
+#else
+void __GCC_offload___cxa_finalize (void *) __attribute__((weak));
+
+void __attribute__((weak))
+__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused)))
+{
+}
+#endif
+
+/* There are no DSOs; this is the main program.  */
+static void * const __dso_handle = 0;
+
+
 #ifdef USE_NEWLIB_INITFINI
 
 extern void __libc_init_array (void) __attribute__((weak));
@@ -38,6 +60,11 @@ void _init_array()
 __attribute__((amdgpu_hsa_kernel ()))
 void _fini_array()
 {
+#if PR119369_fixed
+  if (__GCC_offload___cxa_finalize)
+#endif
+    __GCC_offload___cxa_finalize (__dso_handle);
+
   __libc_fini_array ();
 }
 
@@ -70,6 +97,11 @@ void _init_array()
 __attribute__((amdgpu_hsa_kernel ()))
 void _fini_array()
 {
+#if PR119369_fixed
+  if (__GCC_offload___cxa_finalize)
+#endif
+    __GCC_offload___cxa_finalize (__dso_handle);
+
   size_t count;
   size_t i;
 
diff --git a/libgcc/config/nvptx/gbl-ctors.c b/libgcc/config/nvptx/gbl-ctors.c
index 26268116ee0..10954ee3ab6 100644
--- a/libgcc/config/nvptx/gbl-ctors.c
+++ b/libgcc/config/nvptx/gbl-ctors.c
@@ -31,6 +31,20 @@
 extern int atexit (void (*function) (void));
 
 
+/* Host/device compatibility: '__cxa_finalize'.  Dummy; if necessary,
+   overridden via libgomp 'target-cxa-dso-dtor.c'.  */
+
+extern void __GCC_offload___cxa_finalize (void *);
+
+void __attribute__((weak))
+__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused)))
+{
+}
+
+/* There are no DSOs; this is the main program.  */
+static void * const __dso_handle = 0;
+
+
 /* Handler functions ('static', in contrast to the 'gbl-ctors.h'
    prototypes).  */
 
@@ -49,6 +63,8 @@ static void __static_do_global_dtors (void);
 static void
 __static_do_global_dtors (void)
 {
+  __GCC_offload___cxa_finalize (__dso_handle);
+
   func_ptr *p = __DTOR_LIST__;
   ++p;
   for (; *p; ++p)
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index e3202aeb0e0..19479aea462 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c error.c \
        target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
        oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
        priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-       oacc-target.c target-indirect.c
+       oacc-target.c target-indirect.c target-cxa-dso-dtor.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 2a0a842af52..6d22b3d3bfd 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo 
critical.lo \
        oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
        oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
        affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
-       oacc-target.lo target-indirect.lo $(am__objects_1)
+       oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \
+       $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c 
env.c \
        oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
        oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
        affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-       oacc-target.c target-indirect.c $(am__append_3)
+       oacc-target.c target-indirect.c target-cxa-dso-dtor.c \
+       $(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info 
$(libtool_VERSION)
@@ -780,6 +782,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c 
b/libgomp/config/accel/target-cxa-dso-dtor.c
new file mode 100644
index 00000000000..e40a5f0bdfe
--- /dev/null
+++ b/libgomp/config/accel/target-cxa-dso-dtor.c
@@ -0,0 +1,62 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libgomp.h"
+
+extern void __cxa_finalize (void *);
+
+/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.
+
+   Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit'
+   calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host.
+   '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx
+   results in 'ld' error:
+
+       unresolved symbol __dso_handle
+       collect2: error: ld returned 1 exit status
+       nvptx mkoffload: fatal error: 
[...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status
+
+   ..., or for GCN get an implicit definition (running with
+   '--trace-symbol=__dso_handle'):
+
+       ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: 
reference to __dso_handle
+       <internal>: definition of __dso_handle
+
+   ..., which might be fine, but let's just make it explicit.  */
+
+/* There are no DSOs; this is the main program.  */
+attribute_hidden void * const __dso_handle = 0;
+
+/* If this file gets linked in, that means that '__dso_handle' has been
+   referenced (for '__cxa_atexit'), and in that case, we also have to run
+   '__cxa_finalize'.  Make that happen by overriding the weak libgcc dummy
+   function '__GCC_offload___cxa_finalize'.  */
+
+void
+__GCC_offload___cxa_finalize (void *dso_handle)
+{
+  __cxa_finalize (dso_handle);
+}
diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c
new file mode 100644
index 00000000000..d1a898db033
--- /dev/null
+++ b/libgomp/target-cxa-dso-dtor.c
@@ -0,0 +1,3 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */
+
+/* Nothing needed here.  */
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C 
b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
new file mode 100644
index 00000000000..dc53326a773
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
@@ -0,0 +1,104 @@
+/* Offloaded C++ objects construction and destruction.  */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname 
} */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+  int x;
+  
+  S()
+    : x(-1)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  S(int x)
+    : x(x)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  ~S()
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+};
+
+#pragma omp end declare target
+
+S sH1(7);
+
+#pragma omp declare target
+
+S sHD1(5);
+
+std::vector<S> svHD1(2);
+
+#pragma omp end declare target
+
+S sH2(3);
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, 
omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, 
omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+   For the host, there are four expected calls:
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized 
} }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized } }
+
+   For the device, there are two expected calls:
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 
optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } }
+*/
+
+/* C++ objects are constructed in order of appearance (..., and destructed in 
reverse order).
+   { dg-output {S, 7, 1[\r\n]+} }
+   { dg-output {S, 5, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, 3, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 3, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, 5, 1[\r\n]+} }
+   { dg-output {~S, 7, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C 
b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
new file mode 100644
index 00000000000..5fa216805e0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
@@ -0,0 +1,138 @@
+/* Offloaded 'constructor' and 'destructor' functions, and C++ objects 
construction and destruction.  */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname 
} */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+  int x;
+  
+  S()
+    : x(-1)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  S(int x)
+    : x(x)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  ~S()
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+};
+
+#pragma omp end declare target
+
+S sH1 __attribute__((init_priority(1500))) (7);
+
+#pragma omp declare target
+
+S sHD1 __attribute__((init_priority(2000))) (5);
+
+std::vector<S> svHD1 __attribute__((init_priority(1000))) (2);
+
+static void
+__attribute__((constructor(20000)))
+initDH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(20000)))
+finiDH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+S sH2 __attribute__((init_priority(500))) (3);
+
+static void
+__attribute__((constructor(10000)))
+initH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(10000)))
+finiH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, 
omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, 
omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+   For the host, there are four expected calls:
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized 
} }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized } }
+
+   For the device, there are two expected calls:
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 
optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, 
_ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } }
+*/
+
+/* Defined order in which 'constructor' functions, and 'destructor' functions 
are run, and C++ objects are constructed (..., and destructed in reverse order).
+   { dg-output {S, 3, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, 7, 1[\r\n]+} }
+   { dg-output {S, 5, 1[\r\n]+} }
+   { dg-output {initH1, 1[\r\n]+} }
+   { dg-output {initDH1, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {initDH1, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target  { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {finiDH1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 5, 1[\r\n]+} }
+   { dg-output {~S, 7, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, 3, 1[\r\n]+} }
+   { dg-output {finiDH1, 1[\r\n]+} }
+   { dg-output {finiH1, 1[\r\n]+} }
+*/
-- 
2.34.1

Reply via email to