Author: Fangrui Song
Date: 2020-10-05T12:53:59-07:00
New Revision: a2cc8833683dd124cf2ee96f6d17f7f835da1fc8

URL: 
https://github.com/llvm/llvm-project/commit/a2cc8833683dd124cf2ee96f6d17f7f835da1fc8
DIFF: 
https://github.com/llvm/llvm-project/commit/a2cc8833683dd124cf2ee96f6d17f7f835da1fc8.diff

LOG: [CUDA] Don't call __cudaRegisterVariable on C++17 inline variables

D17779: host-side shadow variables of external declarations of device-side
global variables have internal linkage and are referenced by
`__cuda_register_globals`.

nvcc from CUDA 11 does not allow `__device__ inline` or `__device__ constexpr`
(C++17 inline variables) but clang has incorrectly supported them for a while:

```
error: A __device__ variable cannot be marked constexpr
error: An inline __device__/__constant__/__managed__ variable must have 
internal linkage when the program is compiled in whole program mode (-rdc=false)
```

If such a variable (which has a comdat group) is discarded (a copy from another
translation unit is prevailing and selected), accessing the variable from
outside the section group (`__cuda_register_globals`) is a violation of the ELF
specification and will be rejected by linkers:

> A symbol table entry with STB_LOCAL binding that is defined relative to one 
> of a group's sections, and that is contained in a symbol table section that 
> is not part of the group, must be discarded if the group members are 
> discarded. References to this symbol table entry from outside the group are 
> not allowed.

As a workaround, don't register such inline variables for now.
(If we register the variables in all TUs, we will keep multiple instances of 
the shadow and break the C++ semantics for inline variables).
We should reject such variables in Sema but our internal users need some time 
to migrate.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D88786

Added: 
    

Modified: 
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/test/CodeGenCUDA/device-stub.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index c3457865c0b0..93b49ec981e8 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4129,7 +4129,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const 
VarDecl *D,
         // Shadow variables and their properties must be registered with CUDA
         // runtime. Skip Extern global variables, which will be registered in
         // the TU where they are defined.
-        if (!D->hasExternalStorage())
+        //
+        // Don't register a C++17 inline variable. The local symbol can be
+        // discarded and referencing a discarded local symbol from outside the
+        // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+        // TODO: Reject __device__ constexpr and __device__ inline in Sema.
+        if (!D->hasExternalStorage() && !D->isInline())
           getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
                                              D->hasAttr<CUDAConstantAttr>());
       } else if (D->hasAttr<CUDASharedAttr>()) {

diff  --git a/clang/test/CodeGenCUDA/device-stub.cu 
b/clang/test/CodeGenCUDA/device-stub.cu
index ca21116fc989..16bbef6cfad5 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -29,6 +29,10 @@
 // RUN:     -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - 
\
 // RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
 // RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \
+// RUN:     -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - 
\
+// RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
+// RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW,LNX_17
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=9.2 -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
@@ -91,9 +95,18 @@ __device__ int ext_device_var_def = 1;
 // WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef
 __constant__ int ext_constant_var_def = 2;
 
+#if __cplusplus > 201402L
+/// FIXME: Reject __device__ constexpr and inline variables in Sema.
+// LNX_17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
+// LNX_17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, 
align 4{{$}}
+__device__ inline int inline_var = 3;
+struct C {
+  __device__ static constexpr int member_inline_var = 4;
+};
+#endif
 
 void use_pointers() {
-  int *p;
+  const int *p;
   p = &device_var;
   p = &constant_var;
   p = &shared_var;
@@ -101,6 +114,10 @@ void use_pointers() {
   p = &ext_device_var;
   p = &ext_constant_var;
   p = &ext_host_var;
+#if __cplusplus > 201402L
+  p = &inline_var;
+  p = &C::member_inline_var;
+#endif
 }
 
 // Make sure that all parts of GPU code init/cleanup are there:
@@ -185,6 +202,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, 
{{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, 
i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, 
{{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 
0, i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, 
{{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, 
i32 1, i32 0
+// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var
 // ALL: ret void
 
 // Test that we've built a constructor.


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to