yaxunl updated this revision to Diff 320644.
yaxunl marked 6 inline comments as done.
yaxunl added a comment.

Revised by Artem's comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95560/new/

https://reviews.llvm.org/D95560

Files:
  clang/lib/AST/ASTContext.cpp
  clang/lib/CodeGen/CGDecl.cpp
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/AST/ast-dump-func-scope-static-var.cu
  clang/test/CodeGenCUDA/func-scope-static-var.cu
  clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
  clang/test/SemaCUDA/func-scope-static-var.cu

Index: clang/test/SemaCUDA/func-scope-static-var.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/func-scope-static-var.cu
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify=host,com -x hip %s
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fcuda-is-device -verify=dev,com -x hip %s
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fgpu-rdc -verify=host,com -x hip %s
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -fgpu-rdc -fcuda-is-device -verify=dev,com -x hip %s
+
+#include "Inputs/cuda.h"
+
+struct A {
+  static int a;
+  static __device__ int fun(); 
+};
+
+int A::a;
+__device__ int A::fun() {
+  return a;
+  // dev-error@-1 {{reference to __host__ variable 'a' in __device__ function}}
+}
+
+// Assuming this function accepts a pointer to a device variable and calculate some result.
+__device__ __host__ int work(const int *x);
+
+int fun1(int x) {
+  static __device__ int a = sizeof(a);
+  static __device__ int b = x;
+  // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const __device__ int c = sizeof(a);
+  static constexpr __device__ int d = sizeof(a);
+  static __constant__ __device__ int e = sizeof(a);
+  static __managed__ __device__ int f = sizeof(a);
+  static int a2 = sizeof(a);
+  static int b2 = x;
+  static const int c2 = sizeof(a);
+  static constexpr int d2 = sizeof(a);
+  static __constant__ int e2 = sizeof(a);
+  static __managed__ int f2 = sizeof(a);
+  return work(&a) + work(&b) + work(&c) + work(&d) + work(&e) + f + a2 + b2 + c2 + d2 + work(&e2) + f2;
+}
+
+__device__ int fun2(int x) {
+  static __device__ int a = sizeof(a);
+  static __device__ int b = x;
+  // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const __device__ int c = sizeof(a);
+  static constexpr __device__ int d = sizeof(a);
+  static __constant__ __device__ int e = sizeof(a);
+  static __managed__ __device__ int f = sizeof(a);
+  static int a2 = sizeof(a);
+  static int b2 = x;
+  // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const int c2 = sizeof(a);
+  static constexpr int d2 = sizeof(a);
+  static __constant__ int e2 = sizeof(a);
+  static __managed__ int f2 = sizeof(a);
+  return a + b + c + d + e + f + a2 + b2 + c2 + d2 + e2 + f2;
+}
+
+__device__ __host__ int fun3(int x) {
+  static __device__ int a = sizeof(a);
+  static __device__ int b = x;
+  // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const __device__ int c = sizeof(a);
+  static constexpr __device__ int d = sizeof(a);
+  static __constant__ __device__ int e = sizeof(a);
+  static __managed__ __device__ int f = sizeof(a);
+  static int a2 = sizeof(a);
+  static int b2 = x;
+  // dev-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const int c2 = sizeof(a);
+  static constexpr int d2 = sizeof(a);
+  static __constant__ int e2 = sizeof(a);
+  static __managed__ int f2 = sizeof(a);
+  return work(&a) + work(&b) + work(&c) + work(&d) + work(&e) + f + a2 + b2 + c2 + d2 + work(&e2) + f2;
+}
+
+template<typename T>
+__device__ __host__ int fun4(T x) {
+  static __device__ int a = sizeof(x);
+  static __device__ int b = x;
+  // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const __device__ int c = sizeof(x);
+  static constexpr __device__ int d = sizeof(x);
+  static __constant__ __device__ int e = sizeof(a);
+  static __managed__ __device__ int f = sizeof(a);
+  static int a2 = sizeof(x);
+  static int b2 = x;
+  // dev-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const int c2 = sizeof(x);
+  static constexpr int d2 = sizeof(x);
+  static __constant__ int e2 = sizeof(a);
+  static __managed__ int f2 = sizeof(a);
+  return work(&a) + work(&b) + work(&c) + work(&d) + work(&e) + f + a2 + b2 + c2 + d2 + work(&e2) + f2;
+}
+
+__device__ __host__ int fun4_caller() {
+  return fun4(1);
+  // com-note@-1 {{in instantiation of function template specialization 'fun4<int>' requested here}}
+}
+
+__global__ void fun5(int x, int *y) {
+  static __device__ int a = sizeof(a);
+  static __device__ int b = x;
+  // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const __device__ int c = sizeof(a);
+  static constexpr __device__ int d = sizeof(a);
+  static __constant__ __device__ int e = sizeof(a);
+  static __managed__ __device__ int f = sizeof(a);
+  static int a2 = sizeof(a);
+  static int b2 = x;
+  // com-error@-1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+  static const int c2 = sizeof(a);
+  static constexpr int d2 = sizeof(a);
+  static __constant__ int e2 = sizeof(a);
+  static __managed__ int f2 = sizeof(a);
+  *y = a + b + c + d + e + f + a2 + b2 + c2 + d2 + e2 + f2;
+}
Index: clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
===================================================================
--- clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -14,7 +14,7 @@
 // Test function scope static device variable, which should not be externalized.
 // DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1
 // DEV-DAG: @_ZZ6kernelPiPPKiE21local_static_constant = internal addrspace(4) constant i32 42
-// DEV-DAG: @_ZZ6kernelPiPPKiE19local_static_device = internal addrspace(1) constant i32 43
+// DEV-DAG: @_ZZ6kernelPiPPKiE19local_static_device = internal addrspace(4) constant i32 43
 
 // Check a static device variable referenced by host function is externalized.
 // DEV-DAG: @_ZL1x ={{.*}} addrspace(1) externally_initialized global i32 0
Index: clang/test/CodeGenCUDA/func-scope-static-var.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/func-scope-static-var.cu
@@ -0,0 +1,168 @@
+// REQUIRES: x86-registered-target, amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=DEV,NORDC %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -std=c++11 \
+// RUN:   -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=DEV,RDC %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN:   -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=HOST %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -std=c++11 \
+// RUN:   -emit-llvm -fgpu-rdc -o - -x hip %s | FileCheck \
+// RUN:   -check-prefixes=HOST %s
+
+#include "Inputs/cuda.h"
+
+// In device functions, static device variables are not externalized nor shadowed.
+// Static managed variable behaves like a normal static device variable.
+
+// DEV: @_ZZ4fun1vE1a = internal addrspace(1) global i32 1
+// HOST-NOT: @_ZZ4fun1vE1a
+// DEV: @_ZZ4fun1vE1b = internal addrspace(1) global i32 2
+// HOST-NOT: @_ZZ4fun1vE1b
+// DEV: @_ZZ4fun1vE1c = internal addrspace(4) constant i32 3
+// HOST-NOT: @_ZZ4fun1vE1c
+// DEV: @_ZZ4fun1vE1d = internal addrspace(4) constant i32 4
+// HOST-NOT: @_ZZ4fun1vE1d
+// DEV: @_ZZ4fun1vE1e = internal addrspace(4) global i32 5
+// HOST-NOT: @_ZZ4fun1vE1e
+// DEV: @_ZZ4fun1vE1f = internal addrspace(1) global i32 6
+// HOST-NOT: @_ZZ4fun1vE1f
+__device__ int fun1() {
+  static int a = 1;
+  static __device__ int b = 2;
+  static const int c = 3;
+  static constexpr int d = 4;
+  static __constant__ int e = 5;
+  static __managed__ int f = 6;
+  return a + b + c + d + e + f;
+}
+
+// Assuming this function accepts a device pointer and does some work. 
+__host__ __device__ int work(int *x);
+
+// In host function, static device variables are externalized if used and shadowed.
+
+// DEV-NOT: @_ZZ4fun2vE1a
+// HOST: @_ZZ4fun2vE1a = internal global i32 1
+// NORDC: @_ZZ4fun2vE1b = dso_local addrspace(1) global i32 2
+// RDC: @_ZZ4fun2vE1b = internal addrspace(1) global i32 2
+// HOST: @_ZZ4fun2vE1b = internal global i32 2
+// DEV-NOT: @_ZZ4fun2vE1c
+// HOST: @_ZZ4fun2vE1c = internal constant i32 3
+// DEV-NOT: @_ZZ4fun2vE1d
+// HOST: @_ZZ4fun2vE1d = internal constant i32 4
+// NORDC: @_ZZ4fun2vE1e = dso_local addrspace(4) global i32 5
+// RDC: @_ZZ4fun2vE1e = internal addrspace(4) global i32 5
+// HOST: @_ZZ4fun2vE1e = internal global i32 5
+// DEV: @_ZZ4fun2vE1f = internal addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @_ZZ4fun2vE1b to i32*)
+// HOST: @_ZZ4fun2vE1f = internal global i32* @_ZZ4fun2vE1b
+// NORDC: @_ZZ4fun2vE1b_0 = dso_local addrspace(1) global i32 6
+// RDC: @_ZZ4fun2vE1b_0 = internal addrspace(1) global i32 6
+// HOST: @_ZZ4fun2vE1b_0 = internal global i32 6
+// NORDC: @_ZZ4fun2vE1g = dso_local addrspace(1) externally_initialized global i32 undef
+// RDC: @_ZZ4fun2vE1g = external dso_local addrspace(1) global i32
+// HOST: @_ZZ4fun2vE1g = internal global i32 7
+int fun2() {
+  static int a = 1;
+  static __device__ int b = 2;
+  static const int c = 3;
+  static constexpr int d = 4;
+  static __constant__ int e = 5;
+  static __device__ int *f = &b;
+  for (int i = 0; i < 10; i++) {
+    static __device__ int b = 6;
+    work(&b);
+  }
+  static __managed__ int g = 7;
+  return a + c + d + work(&e) + g;
+}
+
+// In host device function, explicit static device variables are externalized
+// if used and registered. Static variables w/o attributes are implicit device
+// variables in device compilation and host variables in host compilation.
+// The variable emitted in host compilation is not the shadow variable of the
+// variable emitted in device compilation.
+
+// DEV: @_ZZ4fun3vE1a = internal addrspace(1) global i32 1
+// HOST: @_ZZ4fun3vE1a = internal global i32 1
+// NORDC: @_ZZ4fun3vE1b = dso_local addrspace(1) global i32 2
+// RDC: @_ZZ4fun3vE1b = internal addrspace(1) global i32 2
+// HOST: @_ZZ4fun3vE1b = internal global i32 2
+// DEV: @_ZZ4fun3vE1c = internal addrspace(4) constant i32 3
+// HOST: @_ZZ4fun3vE1c = internal constant i32 3
+// DEV: @_ZZ4fun3vE1d = internal addrspace(4) constant i32 4
+// HOST: @_ZZ4fun3vE1d = internal constant i32 4
+// NORDC: @_ZZ4fun3vE1e = dso_local addrspace(4) global i32 5
+// RDC: @_ZZ4fun3vE1e = internal addrspace(4) global i32 5
+// HOST: @_ZZ4fun3vE1e = internal global i32 5
+// DEV: @_ZZ4fun3vE1f = internal addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @_ZZ4fun3vE1b to i32*)
+// HOST: @_ZZ4fun3vE1f = internal global i32* @_ZZ4fun3vE1b
+// NORDC: @_ZZ4fun3vE1b_0 = dso_local addrspace(1) global i32 6
+// RDC: @_ZZ4fun3vE1b_0 = internal addrspace(1) global i32 6
+// HOST: @_ZZ4fun3vE1b_0 = internal global i32 6
+// NORDC: @_ZZ4fun3vE1g = dso_local addrspace(1) externally_initialized global i32 undef
+// RDC: @_ZZ4fun3vE1g = external dso_local addrspace(1) global i32
+// HOST: @_ZZ4fun3vE1g = internal global i32 7
+__host__ __device__ int fun3() {
+  static int a = 1;
+  static __device__ int b = 2;
+  static const int c = 3;
+  static constexpr int d = 4;
+  static __constant__ int e = 5;
+  static __device__ int *f = &b;
+  for (int i = 0; i < 10; i++) {
+    static __device__ int b = 6;
+    work(&b);
+  }
+  static __managed__ int g = 7;
+  return a + c + d + work(&e) + g;
+}
+
+// In kernels, static device variables are not externalized nor shadowed
+// since they cannot be accessed by host code. Static managed variable behaves
+// like a normal static device variable.
+
+// DEV: @_ZZ4fun4vE1a = internal addrspace(1) global i32 1
+// HOST-NOT: @_ZZ4fun4vE1a
+// DEV: @_ZZ4fun4vE1b = internal addrspace(1) global i32 2
+// HOST-NOT: @_ZZ4fun4vE1b
+// DEV: @_ZZ4fun4vE1c = internal addrspace(4) constant i32 3
+// HOST-NOT: @_ZZ4fun4vE1c
+// DEV: @_ZZ4fun4vE1d = internal addrspace(4) constant i32 4
+// HOST-NOT: @_ZZ4fun4vE1d
+// DEV: @_ZZ4fun4vE1e = internal addrspace(4) global i32 5
+// HOST-NOT: @_ZZ4fun4vE1e
+// DEV: @_ZZ4fun4vE1f = internal addrspace(1) global i32 6
+// HOST-NOT: @_ZZ4fun4vE1f
+__global__ void fun4() {
+  static int a = 1;
+  static __device__ int b = 2;
+  static const int c = 3;
+  static constexpr int d = 4;
+  static __constant__ int e = 5;
+  static __managed__ int f = 6;
+}
+
+// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun1vE1f
+// HOST-NOT: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun1vE1f
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1b
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1e
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1f
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun2vE1b_0
+// HOST: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun2vE1g
+// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1a
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1b
+// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1c
+// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1d
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1e
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1f
+// HOST: call void @__hipRegisterVar({{.*}}@_ZZ4fun3vE1b_0
+// HOST: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun3vE1g
+// HOST-NOT: call void @__hipRegisterVar({{.*}}@_ZZ4fun4vE1f
+// HOST-NOT: call void @__hipRegisterManagedVar({{.*}}@_ZZ4fun4vE1f
Index: clang/test/AST/ast-dump-func-scope-static-var.cu
===================================================================
--- /dev/null
+++ clang/test/AST/ast-dump-func-scope-static-var.cu
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -std=c++11 -ast-dump -x hip %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -ast-dump -fcuda-is-device -x hip %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK-LABEL: FunctionDecl {{.*}} fun1
+// CHECK: VarDecl {{.*}} a 'int' static
+// CHECK-NOT: CUDADeviceAttr
+// CHECK: VarDecl {{.*}} b 'int' static
+// CHECK-NEXT: CUDADeviceAttr {{.*}}cuda.h
+// CHECK: VarDecl {{.*}} c 'const int' static cinit
+// CHECK-NOT: CUDADeviceAttr
+// CHECK: VarDecl {{.*}} d 'const int' static constexpr cinit
+// CHECK-NOT: CUDADeviceAttr
+// CHECK: VarDecl {{.*}} e 'int' static cinit
+// CHECK: CUDAConstantAttr {{.*}}cuda.h
+// CHECK: VarDecl {{.*}} f 'int' static cinit
+// CHECK: HIPManagedAttr {{.*}}cuda.h
+// CHECK: CUDADeviceAttr {{.*}}Implicit
+// CHECK-NOT: CUDADeviceAttr
+void fun1() {
+  static int a;
+  static __device__ int b;
+  static const int c = 1;
+  static constexpr int d = 1;
+  static __constant__ int e = 1;
+  static __managed__ int f = 1;
+}
+
+// CHECK-LABEL: FunctionDecl {{.*}} fun2
+// CHECK: VarDecl {{.*}} a 'int' static
+// CHECK-NEXT: CUDADeviceAttr {{.*}}Implicit
+// CHECK: VarDecl {{.*}} b 'int' static
+// CHECK-NEXT: CUDADeviceAttr {{.*}}cuda.h
+// CHECK: VarDecl {{.*}} c 'const int' static cinit
+// CHECK: CUDAConstantAttr {{.*}}Implicit
+// CHECK-NOT: CUDADeviceAttr
+// CHECK: VarDecl {{.*}} d 'const int' static constexpr cinit
+// CHECK: CUDAConstantAttr {{.*}}Implicit
+// CHECK-NOT: CUDADeviceAttr
+// CHECK: VarDecl {{.*}} e 'int' static cinit
+// CHECK: CUDAConstantAttr {{.*}}cuda.h
+// CHECK: VarDecl {{.*}} f 'int' static cinit
+// CHECK: HIPManagedAttr {{.*}}cuda.h
+// CHECK: CUDADeviceAttr {{.*}}Implicit
+__device__ void fun2() {
+  static int a;
+  static __device__ int b;
+  static const int c = 1;
+  static constexpr int d = 1;
+  static __constant__ int e = 1;
+  static __managed__ int f = 1;
+}
Index: clang/lib/Sema/SemaDecl.cpp
===================================================================
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -7244,6 +7244,25 @@
   // Handle attributes prior to checking for duplicates in MergeVarDecl
   ProcessDeclAttributes(S, NewVD, D);
 
+  // CUDA/HIP: Function-scope static variables in device or global functions
+  // have implicit device or constant attribute. Function-scope static variables
+  // in host device functions have implicit device or constant attribute in
+  // device compilation only.
+  if (getLangOpts().CUDA && SC == SC_Static) {
+    FunctionDecl *CurFD = getCurFunctionDecl();
+    if (CurFD &&
+        (CurFD->hasAttr<CUDADeviceAttr>() ||
+         CurFD->hasAttr<CUDAGlobalAttr>()) &&
+        (getLangOpts().CUDAIsDevice || !CurFD->hasAttr<CUDAHostAttr>()) &&
+        !NewVD->hasAttr<CUDASharedAttr>() &&
+        !NewVD->hasAttr<CUDAConstantAttr>()) {
+      if (NewVD->isConstexpr() || NewVD->getType().getQualifiers().hasConst())
+        NewVD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext()));
+      else if (!NewVD->hasAttr<CUDADeviceAttr>())
+        NewVD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext()));
+    }
+  }
+
   if (getLangOpts().CUDA || getLangOpts().OpenMPIsDevice ||
       getLangOpts().SYCLIsDevice) {
     if (EmitTLSUnsupportedError &&
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -94,6 +94,42 @@
   llvm_unreachable("invalid C++ ABI kind");
 }
 
+// Helper class for emitting device-side static variables created in host-side
+// functions for CUDA/HIP. While we do not emit host-side functions on device,
+// we still need to emit the static variables the host code will expect to see
+// on the device.
+class CUDAStaticDeviceVarEmitter
+    : public StmtVisitor<CUDAStaticDeviceVarEmitter> {
+public:
+  CodeGenFunction CGF;
+  CUDAStaticDeviceVarEmitter(CodeGenModule &CGM) : CGF(CGM) {}
+  void Visit(Stmt *S) {
+    if (!S)
+      return;
+    if (auto *DS = dyn_cast<DeclStmt>(S)) {
+      for (auto &&D : DS->decls()) {
+        if (auto *VD = dyn_cast<VarDecl>(D)) {
+          if (VD->hasAttr<CUDADeviceAttr>() ||
+              VD->hasAttr<CUDAConstantAttr>()) {
+            llvm::GlobalValue::LinkageTypes Linkage =
+                CGF.CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
+            return CGF.EmitStaticVarDecl(*VD, Linkage);
+          }
+        }
+      }
+    }
+    for (auto &&SS : S->children())
+      Visit(SS);
+  }
+  void runOn(const FunctionDecl *FD) {
+    assert(CGF.getLangOpts().CUDAIsDevice);
+    assert(!FD->hasAttr<CUDADeviceAttr>() && !FD->hasAttr<CUDAGlobalAttr>());
+    assert(FD->hasBody());
+    CGF.CurFuncDecl = FD;
+    Visit(FD->getBody());
+  }
+};
+
 CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
                              const PreprocessorOptions &PPO,
                              const CodeGenOptions &CGO, llvm::Module &M,
@@ -2748,8 +2784,16 @@
           !Global->hasAttr<CUDAConstantAttr>() &&
           !Global->hasAttr<CUDASharedAttr>() &&
           !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
-          !Global->getType()->isCUDADeviceBuiltinTextureType())
+          !Global->getType()->isCUDADeviceBuiltinTextureType()) {
+        if (auto *FD = dyn_cast<FunctionDecl>(Global)) {
+          if (FD->hasBody()) {
+            // Emit static device or constant variables for host functions.
+            CUDAStaticDeviceVarEmitter E(*this);
+            E.runOn(FD);
+          }
+        }
         return;
+      }
     } else {
       // We need to emit host-side 'shadows' for all global
       // device-side variables because the CUDA runtime needs their
Index: clang/lib/CodeGen/CGDecl.cpp
===================================================================
--- clang/lib/CodeGen/CGDecl.cpp
+++ clang/lib/CodeGen/CGDecl.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGBlocks.h"
+#include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGCleanup.h"
 #include "CGDebugInfo.h"
@@ -414,15 +415,41 @@
   llvm::GlobalVariable *var =
     cast<llvm::GlobalVariable>(addr->stripPointerCasts());
 
+  // CUDA/HIP: need to register static device variable declared in host
+  // or host device functions.
+  if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice && CurFuncDecl) {
+    if (auto *FD = dyn_cast<FunctionDecl>(CurFuncDecl)) {
+      if (!FD->hasAttr<CUDAGlobalAttr>() &&
+          (!FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAHostAttr>()))
+        CGM.getCUDARuntime().handleVarRegistration(&D, *var);
+    }
+  }
+
   // CUDA's local and local static __shared__ variables should not
   // have any non-empty initializers. This is ensured by Sema.
   // Whatever initializer such variable may have when it gets here is
   // a no-op and should not be emitted.
   bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
                          D.hasAttr<CUDASharedAttr>();
-  // If this value has an initializer, emit it.
-  if (D.getInit() && !isCudaSharedVar)
+  // HIP static managed variables need to be emitted as declarations in device
+  // compilation in host or host device functions.
+  bool isUndefManagedVar = false;
+  if (getLangOpts().CUDAIsDevice && D.hasAttr<HIPManagedAttr>() &&
+      CurFuncDecl) {
+    if (auto *FD = dyn_cast<FunctionDecl>(CurFuncDecl)) {
+      if (!FD->hasAttr<CUDAGlobalAttr>() &&
+          (!FD->hasAttr<CUDADeviceAttr>() || FD->hasAttr<CUDAHostAttr>())) {
+        isUndefManagedVar = true;
+      }
+    }
+  }
+  if (isUndefManagedVar) {
+    var->setInitializer(nullptr);
+    var->setLinkage(llvm::GlobalValue::ExternalLinkage);
+  } else if (D.getInit() && !isCudaSharedVar) {
+    // If this value has an initializer, emit it.
     var = AddInitializerToStaticVarDecl(D, var);
+  }
 
   var->setAlignment(alignment.getAsAlign());
 
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -11438,9 +11438,9 @@
          ((D->hasAttr<CUDADeviceAttr>() &&
            !D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
           (D->hasAttr<CUDAConstantAttr>() &&
-           !D->getAttr<CUDAConstantAttr>()->isImplicit())) &&
-         isa<VarDecl>(D) && cast<VarDecl>(D)->isFileVarDecl() &&
-         cast<VarDecl>(D)->getStorageClass() == SC_Static;
+           !D->getAttr<CUDAConstantAttr>()->isImplicit()) ||
+          D->hasAttr<HIPManagedAttr>()) &&
+         isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
 }
 
 bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to