yaxunl updated this revision to Diff 424280.
yaxunl marked an inline comment as done.
yaxunl added a comment.

use static function


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124189/new/

https://reviews.llvm.org/D124189

Files:
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/device-var-linkage.cu
  clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
  clang/test/CodeGenCUDA/managed-var.cu
  clang/test/CodeGenCUDA/static-device-var-rdc.cu

Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -40,6 +40,11 @@
 // RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.host
 // RUN: cat %t.host | FileCheck -check-prefix=HOST-NEG %s
 
+// Check postfix for CUDA.
+
+// RUN: %clang_cc1 -no-opaque-pointers -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - %s | FileCheck \
+// RUN:   -check-prefixes=CUDA %s
 
 #include "Inputs/cuda.h"
 
@@ -55,11 +60,12 @@
 // INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
 
 // Test externalized static device variables
-// EXT-DEV-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
-// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH:.*]]\00"
+// EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH:.*]]\00"
+// CUDA-DAG: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
 
-// POSTFIX: @_ZL1x__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
-// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x__static__[[HASH]]\00"
+// POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00"
 
 static __device__ int x;
 
@@ -73,8 +79,8 @@
 // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
 
 // Test externalized static device variables
-// EXT-DEV-DAG: @_ZL1y__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
-// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y__static__[[HASH]]\00"
+// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
+// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
 
 static __constant__ int y;
 
Index: clang/test/CodeGenCUDA/managed-var.cu
===================================================================
--- clang/test/CodeGenCUDA/managed-var.cu
+++ clang/test/CodeGenCUDA/managed-var.cu
@@ -52,15 +52,15 @@
 
 // NORDC-D-DAG: @_ZL2sx.managed = addrspace(1) externally_initialized global i32 1, align 4
 // NORDC-D-DAG: @_ZL2sx = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// RDC-D-DAG: @_ZL2sx__static__[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
-// RDC-D-DAG: @_ZL2sx__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-D-DAG: @_ZL2sx.static.[[HASH:.*]].managed = addrspace(1) externally_initialized global i32 1, align 4
+// RDC-D-DAG: @_ZL2sx.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
 // HOST-DAG: @_ZL2sx.managed = internal global i32 1
 // HOST-DAG: @_ZL2sx = internal externally_initialized global i32* null
 // NORDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx\00"
-// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH:.*]]\00"
+// RDC-DAG: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH:.*]]\00"
 
-// POSTFIX:  @_ZL2sx__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx__static__[[HASH]]\00"
+// POSTFIX:  @_ZL2sx.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// POSTFIX: @[[DEVNAMESX:[0-9]+]] = {{.*}}c"_ZL2sx.static.[[HASH]]\00"
 static __managed__ int sx = 1;
 
 // DEV-DAG: @llvm.compiler.used
Index: clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
===================================================================
--- clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
+++ clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
@@ -8,17 +8,38 @@
 
 // RUN: cat %t.dev %t.host | FileCheck %s
 
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
+// RUN:   -emit-llvm -o - %s | FileCheck -check-prefix=CUDA %s
+
 #include "Inputs/cuda.h"
 
-// CHECK: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.anon\.b04fd23c98500190]](
-// CHECK: @[[STR:.*]] = {{.*}} c"[[KERN]]\00"
-// CHECK: call i32 @__hipRegisterFunction({{.*}}@[[STR]]
+// CHECK-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
+// CHECK-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
+// CHECK-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
+// CHECK-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00"
+// CHECK-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00"
+// CHECK-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00"
+// CHECK-DAG: call i32 @__hipRegisterFunction({{.*}}@[[STR1]]
+// CHECK-DAG: call i32 @__hipRegisterFunction({{.*}}@[[STR2]]
+// CHECK-DAG: call i32 @__hipRegisterFunction({{.*}}@[[STR3]]
+
+// CUDA: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
+
+template <typename T>
+__global__ void tempKern(T x) {}
 
 namespace {
-__global__ void kernel() {
-}
+  __global__ void kernel() {}
+  struct X {};
+  X x;
+  auto lambda = [](){};
 }
 
 void test() {
   kernel<<<1, 1>>>();
+
+  tempKern<<<1, 1>>>(x);
+
+  tempKern<<<1, 1>>>(lambda);
 }
Index: clang/test/CodeGenCUDA/device-var-linkage.cu
===================================================================
--- clang/test/CodeGenCUDA/device-var-linkage.cu
+++ clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -37,15 +37,15 @@
 extern __managed__ int ev3;
 
 // NORDC-DAG: @_ZL3sv1 = addrspace(1) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
+// RDC-DAG: @_ZL3sv1.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
 // HOST-DAG: @_ZL3sv1 = internal global i32 undef
 static __device__ int sv1;
 // NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
+// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
 // HOST-DAG: @_ZL3sv2 = internal global i32 undef
 static __constant__ int sv2;
 // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global i32 addrspace(1)* null
-// RDC-DAG: @_ZL3sv3__static__[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
+// RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global i32 addrspace(1)* null
 // HOST-DAG: @_ZL3sv3 = internal externally_initialized global i32* null
 static __managed__ int sv3;
 
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -6779,8 +6779,14 @@
 
 void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
                                                     const Decl *D) const {
-  OS << (isa<VarDecl>(D) ? "__static__" : ".anon.")
-     << getContext().getCUIDHash();
+  StringRef Tag;
+  // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers
+  // postfix beginning with '.' since the symbol name can be demangled.
+  if (LangOpts.HIP)
+    Tag = (isa<VarDecl>(D) ? ".static." : ".intern.");
+  else
+    Tag = (isa<VarDecl>(D) ? "__static__" : "__intern__");
+  OS << Tag << getContext().getCUIDHash();
 }
 
 namespace {
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -11307,8 +11307,10 @@
   return GVA_DiscardableODR;
 }
 
-static GVALinkage adjustGVALinkageForAttributes(const ASTContext &Context,
-                                                const Decl *D, GVALinkage L) {
+static GVALinkage
+adjustGVALinkageForAttributes(const ASTContext &Context, const Decl *D,
+                              GVALinkage L,
+                              bool ConsiderCudaGlobalAttr = true) {
   // See http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx
   // dllexport/dllimport on inline functions.
   if (D->hasAttr<DLLImportAttr>()) {
@@ -11317,7 +11319,8 @@
   } else if (D->hasAttr<DLLExportAttr>()) {
     if (L == GVA_DiscardableODR)
       return GVA_StrongODR;
-  } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) {
+  } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice &&
+             ConsiderCudaGlobalAttr) {
     // Device-side functions with __global__ attribute must always be
     // visible externally so they can be launched from host.
     if (D->hasAttr<CUDAGlobalAttr>() &&
@@ -11366,6 +11369,16 @@
              basicGVALinkageForFunction(*this, FD)));
 }
 
+static GVALinkage
+GetGVALinkageForCUDAKernelWithoutGlobalAttr(const ASTContext &Context,
+                                            const FunctionDecl *FD) {
+  return adjustGVALinkageForExternalDefinitionKind(
+      Context, FD,
+      adjustGVALinkageForAttributes(Context, FD,
+                                    basicGVALinkageForFunction(Context, FD),
+                                    /*ConsiderCudaGlobalAttr=*/false));
+}
+
 static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
                                              const VarDecl *VD) {
   if (!VD->isExternallyVisible())
@@ -12290,7 +12303,9 @@
   // anonymous name space needs to be externalized to avoid duplicate symbols.
   return (IsStaticVar &&
           (D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
-         (D->hasAttr<CUDAGlobalAttr>() && D->isInAnonymousNamespace());
+         (D->hasAttr<CUDAGlobalAttr>() &&
+          GetGVALinkageForCUDAKernelWithoutGlobalAttr(
+              *this, cast<FunctionDecl>(D)) == GVA_Internal);
 }
 
 bool ASTContext::shouldExternalize(const Decl *D) const {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to