'__dso_handle' for '__cxa_atexit', '__cxa_finalize'. See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.
PR target/119853 PR target/119854 libgcc/ * config/gcn/crt0.c (_fini_array): Call '__GCC_offload___cxa_finalize'. * config/nvptx/gbl-ctors.c (__static_do_global_dtors): Likewise. libgomp/ * target-cxa-dso-dtor.c: New. * config/accel/target-cxa-dso-dtor.c: Likewise. * Makefile.am (libgomp_la_SOURCES): Add it. * Makefile.in: Regenerate. * testsuite/libgomp.c++/target-cdtor-1.C: New. * testsuite/libgomp.c++/target-cdtor-2.C: Likewise. --- libgcc/config/gcn/crt0.c | 32 ++++ libgcc/config/nvptx/gbl-ctors.c | 16 ++ libgomp/Makefile.am | 2 +- libgomp/Makefile.in | 7 +- libgomp/config/accel/target-cxa-dso-dtor.c | 62 ++++++++ libgomp/target-cxa-dso-dtor.c | 3 + .../testsuite/libgomp.c++/target-cdtor-1.C | 104 +++++++++++++ .../testsuite/libgomp.c++/target-cdtor-2.C | 138 ++++++++++++++++++ 8 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 libgomp/config/accel/target-cxa-dso-dtor.c create mode 100644 libgomp/target-cxa-dso-dtor.c create mode 100644 libgomp/testsuite/libgomp.c++/target-cdtor-1.C create mode 100644 libgomp/testsuite/libgomp.c++/target-cdtor-2.C diff --git a/libgcc/config/gcn/crt0.c b/libgcc/config/gcn/crt0.c index dbd6749a47f..cc23e214cf9 100644 --- a/libgcc/config/gcn/crt0.c +++ b/libgcc/config/gcn/crt0.c @@ -24,6 +24,28 @@ typedef long long size_t; /* Provide an entry point symbol to silence a linker warning. */ void _start() {} + +#define PR119369_fixed 0 + + +/* Host/device compatibility: '__cxa_finalize'. Dummy; if necessary, + overridden via libgomp 'target-cxa-dso-dtor.c'. */ + +#if PR119369_fixed +extern void __GCC_offload___cxa_finalize (void *) __attribute__((weak)); +#else +void __GCC_offload___cxa_finalize (void *) __attribute__((weak)); + +void __attribute__((weak)) +__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused))) +{ +} +#endif + +/* There are no DSOs; this is the main program. */ +static void * const __dso_handle = 0; + + #ifdef USE_NEWLIB_INITFINI extern void __libc_init_array (void) __attribute__((weak)); @@ -38,6 +60,11 @@ void _init_array() __attribute__((amdgpu_hsa_kernel ())) void _fini_array() { +#if PR119369_fixed + if (__GCC_offload___cxa_finalize) +#endif + __GCC_offload___cxa_finalize (__dso_handle); + __libc_fini_array (); } @@ -70,6 +97,11 @@ void _init_array() __attribute__((amdgpu_hsa_kernel ())) void _fini_array() { +#if PR119369_fixed + if (__GCC_offload___cxa_finalize) +#endif + __GCC_offload___cxa_finalize (__dso_handle); + size_t count; size_t i; diff --git a/libgcc/config/nvptx/gbl-ctors.c b/libgcc/config/nvptx/gbl-ctors.c index 26268116ee0..10954ee3ab6 100644 --- a/libgcc/config/nvptx/gbl-ctors.c +++ b/libgcc/config/nvptx/gbl-ctors.c @@ -31,6 +31,20 @@ extern int atexit (void (*function) (void)); +/* Host/device compatibility: '__cxa_finalize'. Dummy; if necessary, + overridden via libgomp 'target-cxa-dso-dtor.c'. */ + +extern void __GCC_offload___cxa_finalize (void *); + +void __attribute__((weak)) +__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused))) +{ +} + +/* There are no DSOs; this is the main program. */ +static void * const __dso_handle = 0; + + /* Handler functions ('static', in contrast to the 'gbl-ctors.h' prototypes). */ @@ -49,6 +63,8 @@ static void __static_do_global_dtors (void); static void __static_do_global_dtors (void) { + __GCC_offload___cxa_finalize (__dso_handle); + func_ptr *p = __DTOR_LIST__; ++p; for (; *p; ++p) diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index e3202aeb0e0..19479aea462 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c + oacc-target.c target-indirect.c target-cxa-dso-dtor.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 2a0a842af52..6d22b3d3bfd 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \ oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ - oacc-target.lo target-indirect.lo $(am__objects_1) + oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c target-indirect.c $(am__append_3) + oacc-target.c target-indirect.c target-cxa-dso-dtor.c \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -780,6 +782,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c b/libgomp/config/accel/target-cxa-dso-dtor.c new file mode 100644 index 00000000000..e40a5f0bdfe --- /dev/null +++ b/libgomp/config/accel/target-cxa-dso-dtor.c @@ -0,0 +1,62 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API + + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libgomp.h" + +extern void __cxa_finalize (void *); + +/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>. + + Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit' + calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host. + '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx + results in 'ld' error: + + unresolved symbol __dso_handle + collect2: error: ld returned 1 exit status + nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status + + ..., or for GCN get an implicit definition (running with + '--trace-symbol=__dso_handle'): + + ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle + <internal>: definition of __dso_handle + + ..., which might be fine, but let's just make it explicit. */ + +/* There are no DSOs; this is the main program. */ +attribute_hidden void * const __dso_handle = 0; + +/* If this file gets linked in, that means that '__dso_handle' has been + referenced (for '__cxa_atexit'), and in that case, we also have to run + '__cxa_finalize'. Make that happen by overriding the weak libgcc dummy + function '__GCC_offload___cxa_finalize'. */ + +void +__GCC_offload___cxa_finalize (void *dso_handle) +{ + __cxa_finalize (dso_handle); +} diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c new file mode 100644 index 00000000000..d1a898db033 --- /dev/null +++ b/libgomp/target-cxa-dso-dtor.c @@ -0,0 +1,3 @@ +/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */ + +/* Nothing needed here. */ diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C new file mode 100644 index 00000000000..dc53326a773 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C @@ -0,0 +1,104 @@ +/* Offloaded C++ objects construction and destruction. */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1(7); + +#pragma omp declare target + +S sHD1(5); + +std::vector<S> svHD1(2); + +#pragma omp end declare target + +S sH2(3); + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } } +*/ + +/* C++ objects are constructed in order of appearance (..., and destructed in reverse order). + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } +*/ diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C new file mode 100644 index 00000000000..5fa216805e0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C @@ -0,0 +1,138 @@ +/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */ + +/* { dg-additional-options -fdump-tree-optimized-raw-asmname } + { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */ + +#include <omp.h> +#include <vector> + +#pragma omp declare target + +struct S +{ + int x; + + S() + : x(-1) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + S(int x) + : x(x) + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } + ~S() + { + __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device()); + } +}; + +#pragma omp end declare target + +S sH1 __attribute__((init_priority(1500))) (7); + +#pragma omp declare target + +S sHD1 __attribute__((init_priority(2000))) (5); + +std::vector<S> svHD1 __attribute__((init_priority(1000))) (2); + +static void +__attribute__((constructor(20000))) +initDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(20000))) +finiDH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +#pragma omp end declare target + +S sH2 __attribute__((init_priority(500))) (3); + +static void +__attribute__((constructor(10000))) +initH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +static void +__attribute__((destructor(10000))) +finiH1() +{ + __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device()); +} + +int main() +{ + int c = 0; + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + +#pragma omp target map(c) + { + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + } + + __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device()); + + return 0; +} + +/* Verify '__cxa_atexit' calls. + + For the host, there are four expected calls: + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized } } + + For the device, there are two expected calls: + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized } } + { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized } } +*/ + +/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order). + { dg-output {S, 3, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, -1, 1[\r\n]+} } + { dg-output {S, 7, 1[\r\n]+} } + { dg-output {S, 5, 1[\r\n]+} } + { dg-output {initH1, 1[\r\n]+} } + { dg-output {initDH1, 1[\r\n]+} } + { dg-output {main:1, 1[\r\n]+} } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {initDH1, 0[\r\n]+} { target offload_device } } + { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:2, 0[\r\n]+} { target offload_device } } + { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } } + { dg-output {main:3, 0[\r\n]+} { target offload_device } } + { dg-output {main:4, 1[\r\n]+} } + { dg-output {~S, 5, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, -1, 0[\r\n]+} { target offload_device } } + { dg-output {finiDH1, 0[\r\n]+} { target offload_device } } + { dg-output {~S, 5, 1[\r\n]+} } + { dg-output {~S, 7, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, -1, 1[\r\n]+} } + { dg-output {~S, 3, 1[\r\n]+} } + { dg-output {finiDH1, 1[\r\n]+} } + { dg-output {finiH1, 1[\r\n]+} } +*/ -- 2.34.1