Author: Fangrui Song Date: 2020-10-05T12:53:59-07:00 New Revision: a2cc8833683dd124cf2ee96f6d17f7f835da1fc8
URL: https://github.com/llvm/llvm-project/commit/a2cc8833683dd124cf2ee96f6d17f7f835da1fc8 DIFF: https://github.com/llvm/llvm-project/commit/a2cc8833683dd124cf2ee96f6d17f7f835da1fc8.diff LOG: [CUDA] Don't call __cudaRegisterVariable on C++17 inline variables D17779: host-side shadow variables of external declarations of device-side global variables have internal linkage and are referenced by `__cuda_register_globals`. nvcc from CUDA 11 does not allow `__device__ inline` or `__device__ constexpr` (C++17 inline variables) but clang has incorrectly supported them for a while: ``` error: A __device__ variable cannot be marked constexpr error: An inline __device__/__constant__/__managed__ variable must have internal linkage when the program is compiled in whole program mode (-rdc=false) ``` If such a variable (which has a comdat group) is discarded (a copy from another translation unit is prevailing and selected), accessing the variable from outside the section group (`__cuda_register_globals`) is a violation of the ELF specification and will be rejected by linkers: > A symbol table entry with STB_LOCAL binding that is defined relative to one > of a group's sections, and that is contained in a symbol table section that > is not part of the group, must be discarded if the group members are > discarded. References to this symbol table entry from outside the group are > not allowed. As a workaround, don't register such inline variables for now. (If we register the variables in all TUs, we will keep multiple instances of the shadow and break the C++ semantics for inline variables). We should reject such variables in Sema but our internal users need some time to migrate. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D88786 Added: Modified: clang/lib/CodeGen/CodeGenModule.cpp clang/test/CodeGenCUDA/device-stub.cu Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index c3457865c0b0..93b49ec981e8 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4129,7 +4129,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, // Shadow variables and their properties must be registered with CUDA // runtime. Skip Extern global variables, which will be registered in // the TU where they are defined. - if (!D->hasExternalStorage()) + // + // Don't register a C++17 inline variable. The local symbol can be + // discarded and referencing a discarded local symbol from outside the + // comdat (__cuda_register_globals) is disallowed by the ELF spec. + // TODO: Reject __device__ constexpr and __device__ inline in Sema. + if (!D->hasExternalStorage() && !D->isInline()) getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), D->hasAttr<CUDAConstantAttr>()); } else if (D->hasAttr<CUDASharedAttr>()) { diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index ca21116fc989..16bbef6cfad5 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -29,6 +29,10 @@ // RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \ // RUN: | FileCheck %s -allow-deprecated-dag-overlap \ // RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \ +// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \ +// RUN: | FileCheck %s -allow-deprecated-dag-overlap \ +// RUN: --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW,LNX_17 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -target-sdk-version=9.2 -o - \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN @@ -91,9 +95,18 @@ __device__ int ext_device_var_def = 1; // WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef __constant__ int ext_constant_var_def = 2; +#if __cplusplus > 201402L +/// FIXME: Reject __device__ constexpr and inline variables in Sema. +// LNX_17: @inline_var = internal global i32 undef, comdat, align 4{{$}} +// LNX_17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}} +__device__ inline int inline_var = 3; +struct C { + __device__ static constexpr int member_inline_var = 4; +}; +#endif void use_pointers() { - int *p; + const int *p; p = &device_var; p = &constant_var; p = &shared_var; @@ -101,6 +114,10 @@ void use_pointers() { p = &ext_device_var; p = &ext_constant_var; p = &ext_host_var; +#if __cplusplus > 201402L + p = &inline_var; + p = &C::member_inline_var; +#endif } // Make sure that all parts of GPU code init/cleanup are there: @@ -185,6 +202,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0 +// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var // ALL: ret void // Test that we've built a constructor. _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits