yaxunl updated this revision to Diff 277272.
yaxunl marked 3 inline comments as done.
yaxunl added a comment.
Only allow cuid to be alphanumeric and underscore.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D80858/new/
https://reviews.llvm.org/D80858
Files:
clang/include/clang/AST/ASTContext.h
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Basic/LangOptions.h
clang/include/clang/Driver/Action.h
clang/include/clang/Driver/Compilation.h
clang/include/clang/Driver/Options.td
clang/lib/AST/ASTContext.cpp
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
clang/lib/Driver/Action.cpp
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Frontend/CompilerInvocation.cpp
clang/test/CodeGenCUDA/static-device-var.cu
clang/test/Driver/hip-cuid.hip
clang/test/Frontend/hip-cuid.hip
clang/test/SemaCUDA/static-device-var.cu
Index: clang/test/SemaCUDA/static-device-var.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/static-device-var.cu
@@ -0,0 +1,37 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \
+// RUN: -emit-llvm -o - %s -fsyntax-only -verify
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN: -emit-llvm -o - %s -fsyntax-only -verify
+
+#include "Inputs/cuda.h"
+
+__device__ void f1() {
+ const static int b = 123;
+ static int a;
+ // expected-error@-1 {{within a __device__ function, only __shared__ variables or const variables without device memory qualifier may be marked 'static'}}
+}
+
+__global__ void k1() {
+ const static int b = 123;
+ static int a;
+ // expected-error@-1 {{within a __global__ function, only __shared__ variables or const variables without device memory qualifier may be marked 'static'}}
+}
+
+static __device__ int x;
+static __constant__ int y;
+
+__global__ void kernel(int *a) {
+ a[0] = x;
+ a[1] = y;
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+ getDeviceSymbol(&x);
+ getDeviceSymbol(&y);
+}
Index: clang/test/Frontend/hip-cuid.hip
===================================================================
--- /dev/null
+++ clang/test/Frontend/hip-cuid.hip
@@ -0,0 +1,6 @@
+// RUN: not %clang_cc1 -cuid=abc-123 -offload-arch=gfx906 %s 2>&1 \
+// RUN: | FileCheck --check-prefix=INVALID %s
+
+// INVALID: invalid value 'abc-123' in '-cuid=abc-123' (alphanumeric characters and underscore only)
+
+// RUN: %clang_cc1 -cuid=abc_123 -offload-arch=gfx906 %s
Index: clang/test/Driver/hip-cuid.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/hip-cuid.hip
@@ -0,0 +1,130 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Check invalid -fuse-cuid= option.
+
+// RUN: not %clang -### -x hip \
+// RUN: -target x86_64-unknown-linux-gnu \
+// RUN: --offload-arch=gfx900 \
+// RUN: --offload-arch=gfx906 \
+// RUN: -c -nogpulib -fuse-cuid=invalid \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=INVALID %s
+
+// INVALID: invalid value 'invalid' in '-fuse-cuid=invalid'
+
+// Check random CUID generator.
+
+// RUN: %clang -### -x hip \
+// RUN: -target x86_64-unknown-linux-gnu \
+// RUN: --offload-arch=gfx900 \
+// RUN: --offload-arch=gfx906 \
+// RUN: -c -nogpulib -fuse-cuid=random \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s
+
+// Check fixed CUID.
+
+// RUN: %clang -### -x hip \
+// RUN: -target x86_64-unknown-linux-gnu \
+// RUN: --offload-arch=gfx900 \
+// RUN: --offload-arch=gfx906 \
+// RUN: -c -nogpulib -cuid=xyz_123 \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s
+
+// Check fixed CUID override -fuse-cuid.
+
+// RUN: %clang -### -x hip \
+// RUN: -target x86_64-unknown-linux-gnu \
+// RUN: --offload-arch=gfx900 \
+// RUN: --offload-arch=gfx906 \
+// RUN: -c -nogpulib -fuse-cuid=random -cuid=xyz_123 \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,FIXED %s
+
+// Check hash CUID generator.
+
+// RUN: %clang -### -x hip \
+// RUN: -target x86_64-unknown-linux-gnu \
+// RUN: --offload-arch=gfx900 \
+// RUN: --offload-arch=gfx906 \
+// RUN: -c -nogpulib -fuse-cuid=hash \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=COMMON,HEX %s
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx900"
+// HEX-SAME: "-cuid=[[CUID:[0-9a-f]+]]"
+// FIXED-SAME: "-cuid=[[CUID:xyz_123]]"
+// COMMON-SAME: "{{.*}}a.cu"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx906"
+// COMMON-SAME: "-cuid=[[CUID]]"
+// COMMON-SAME: "{{.*}}a.cu"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// COMMON-SAME: "-cuid=[[CUID]]"
+// COMMON-SAME: "{{.*}}a.cu"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx900"
+// HEX-NOT: "-cuid=[[CUID]]"
+// HEX-SAME: "-cuid=[[CUID2:[0-9a-f]+]]"
+// FIXED-SAME: "-cuid=[[CUID2:xyz_123]]"
+// COMMON-SAME: "{{.*}}b.hip"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-target-cpu" "gfx906"
+// HEX-NOT: "-cuid=[[CUID]]"
+// COMMON-SAME: "-cuid=[[CUID2]]"
+// COMMON-SAME: "{{.*}}b.hip"
+
+// COMMON: "{{.*}}clang{{.*}}" "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// HEX-NOT: "-cuid=[[CUID]]"
+// COMMON-SAME: "-cuid=[[CUID2]]"
+// COMMON-SAME: "{{.*}}b.hip"
+
+// Check CUID generated by hash.
+// The same CUID is generated for the same file with the same options.
+
+// RUN: rm -rf %t.out
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu \
+// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu \
+// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1
+
+// RUN: FileCheck %s -check-prefixes=HASH -input-file %t.out
+
+// HASH: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID:[0-9a-f]+]]"
+// HASH: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID]]"
+
+
+// Check CUID generated by hash.
+// Different CUID's are generated for the same file with different options.
+
+// RUN: rm -rf %t.out
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu -DX=1 \
+// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu >%t.out 2>&1
+
+// RUN: %clang -### -x hip -target x86_64-unknown-linux-gnu -DX=2 \
+// RUN: --offload-arch=gfx906 -c -nogpulib -fuse-cuid=hash \
+// RUN: %S/Inputs/../Inputs/hip_multiple_inputs/a.cu >>%t.out 2>&1
+
+// RUN: FileCheck %s -check-prefixes=HASH2 -input-file %t.out
+
+// HASH2: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID:[0-9a-f]+]]"
+// HASH2-NOT: "{{.*}}clang{{.*}}" {{.*}} "-target-cpu" "gfx906" {{.*}}"-cuid=[[CUID]]"
Index: clang/test/CodeGenCUDA/static-device-var.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/static-device-var.cu
@@ -0,0 +1,84 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefixes=DEV,INT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefixes=HOST,INT-HOST %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=123abc \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefixes=DEV,EXT-DEV %s
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=123abc \
+// RUN: -emit-llvm -o - -x hip %s | FileCheck \
+// RUN: -check-prefixes=HOST,EXT-HOST %s
+
+#include "Inputs/cuda.h"
+
+// Test function scope static device variable, which should not be externalized.
+// DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1x = internal addrspace(1) global i32 0
+// INT-HOST-DAG: @_ZL1x = internal global i32 undef
+// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1x.static.123abc = addrspace(1) externally_initialized global i32 0
+// EXT-HOST-DAG: @_ZL1x.static.123abc = internal global i32 undef
+// EXT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.123abc\00"
+
+static __device__ int x;
+
+// Test normal static device variables
+// INT-DEV-DAG: @_ZL1y = internal addrspace(4) global i32 0
+// INT-HOST-DAG: @_ZL1y = internal global i32 undef
+// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
+
+// Test externalized static device variables
+// EXT-DEV-DAG: @_ZL1y.static.123abc = addrspace(4) externally_initialized global i32 0
+// EXT-HOST-DAG: @_ZL1y.static.123abc = internal global i32 undef
+// EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.123abc\00"
+
+static __constant__ int y;
+
+// Test static host variable, which should not be externalized nor registered.
+// HOST-DAG: @_ZL1z = internal global i32 0
+// DEV-NOT: @_ZL1z
+static int z;
+
+// Test static device variable in inline function, which should not be
+// externalized nor registered.
+// DEV-DAG: @_ZZ6devfunPPKiE1p = linkonce_odr addrspace(4) constant i32 2, comdat
+
+inline __device__ void devfun(const int ** b) {
+ const static int p = 2;
+ b[0] = &p;
+}
+
+__global__ void kernel(int *a, const int **b) {
+ const static int w = 1;
+ a[0] = x;
+ a[1] = y;
+ b[0] = &w;
+ devfun(b);
+}
+
+int* getDeviceSymbol(int *x);
+
+void foo() {
+ getDeviceSymbol(&x);
+ getDeviceSymbol(&y);
+ z = 123;
+}
+
+// INT-HOST: __hipRegisterVar({{.*}}@_ZL1x{{.*}}@[[DEVNAMEX]]
+// INT-HOST: __hipRegisterVar({{.*}}@_ZL1y{{.*}}@[[DEVNAMEY]]
+// EXT-HOST: __hipRegisterVar({{.*}}@_ZL1x.static.123abc{{.*}}@[[DEVNAMEX]]
+// EXT-HOST: __hipRegisterVar({{.*}}@_ZL1y.static.123abc{{.*}}@[[DEVNAMEY]]
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6kernelPiPPKiE1w
+// HOST-NOT: __hipRegisterVar({{.*}}@_ZZ6devfunPPKiE1p
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -2629,6 +2629,21 @@
<< Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args);
}
Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api);
+
+ // Only alphanumeric and underscore is allowed in -cuid option.
+ if (auto *A = Args.getLastArg(OPT_cuid_EQ)) {
+ const char *V = A->getValue();
+ bool IsValid = true;
+ for (const char *P = V; *P; ++P) {
+ if (!std::isalnum(*P) && *P != '_') {
+ Diags.Report(diag::err_drv_invalid_cuid) << A->getAsString(Args) << V;
+ IsValid = false;
+ break;
+ }
+ }
+ if (IsValid)
+ Opts.CUID = std::string(V);
+ }
if (Opts.HIP)
Opts.GPUMaxThreadsPerBlock = getLastArgIntValue(
Args, OPT_gpu_max_threads_per_block_EQ, Opts.GPUMaxThreadsPerBlock);
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6006,6 +6006,18 @@
CmdArgs.push_back("-fcuda-short-ptr");
}
+ if (IsCuda || IsHIP) {
+ // Determine the original source input.
+ const Action *SourceAction = &JA;
+ while (SourceAction->getKind() != Action::InputClass) {
+ assert(!SourceAction->getInputs().empty() && "unexpected root action!");
+ SourceAction = SourceAction->getInputs()[0];
+ }
+ auto CUID = cast<InputAction>(SourceAction)->getId();
+ if (!CUID.empty())
+ CmdArgs.push_back(Args.MakeArgString(Twine("-cuid=") + Twine(CUID)));
+ }
+
if (IsHIP)
CmdArgs.push_back("-fcuda-allow-variadic-functions");
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -73,6 +73,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Host.h"
+#include "llvm/Support/MD5.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Process.h"
@@ -2402,6 +2403,14 @@
/// Default GPU architecture if there's no one specified.
CudaArch DefaultCudaArch = CudaArch::UNKNOWN;
+ /// Method to generate compilation unit ID specified by option
+ /// '-fuse-cuid='.
+ enum UseCUIDKind { CUID_Hash, CUID_Random, CUID_None, CUID_Invalid };
+ UseCUIDKind UseCUID = CUID_Hash;
+
+ /// Compilation unit ID specified by option '-cuid='.
+ StringRef FixedCUID;
+
public:
CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs,
@@ -2437,9 +2446,32 @@
// Replicate inputs for each GPU architecture.
auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
: types::TY_CUDA_DEVICE;
+ std::string CUID = FixedCUID.str();
+ if (CUID.empty()) {
+ if (UseCUID == CUID_Random)
+ CUID = llvm::utohexstr(llvm::sys::Process::GetRandomNumber(),
+ /*LowerCase=*/true);
+ else if (UseCUID == CUID_Hash) {
+ llvm::MD5 Hasher;
+ llvm::MD5::MD5Result Hash;
+ SmallString<256> RealPath;
+ llvm::sys::fs::real_path(IA->getInputArg().getValue(), RealPath,
+ /*expand_tilde=*/true);
+ Hasher.update(RealPath);
+ for (auto *A : Args) {
+ if (A->getOption().matches(options::OPT_INPUT))
+ continue;
+ Hasher.update(A->getAsString(Args));
+ }
+ Hasher.final(Hash);
+ CUID = llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
+ }
+ }
+ IA->setId(CUID);
+
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
CudaDeviceActions.push_back(
- C.MakeAction<InputAction>(IA->getInputArg(), Ty));
+ C.MakeAction<InputAction>(IA->getInputArg(), Ty, IA->getId()));
}
return ABRT_Success;
@@ -2555,6 +2587,21 @@
options::OPT_cuda_device_only);
EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
EmitAsm = Args.getLastArg(options::OPT_S);
+ FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ);
+ if (Arg *A = Args.getLastArg(options::OPT_fuse_cuid_EQ)) {
+ StringRef UseCUIDStr = A->getValue();
+ UseCUID = llvm::StringSwitch<UseCUIDKind>(UseCUIDStr)
+ .Case("hash", CUID_Hash)
+ .Case("random", CUID_Random)
+ .Case("none", CUID_None)
+ .Default(CUID_Invalid);
+ if (UseCUID == CUID_Invalid) {
+ C.getDriver().Diag(diag::err_drv_invalid_value)
+ << A->getAsString(Args) << UseCUIDStr;
+ C.setContainsError();
+ return true;
+ }
+ }
// Collect all cuda_gpu_arch parameters, removing duplicates.
std::set<CudaArch> GpuArchs;
Index: clang/lib/Driver/Action.cpp
===================================================================
--- clang/lib/Driver/Action.cpp
+++ clang/lib/Driver/Action.cpp
@@ -165,8 +165,8 @@
void InputAction::anchor() {}
-InputAction::InputAction(const Arg &_Input, types::ID _Type)
- : Action(InputClass, _Type), Input(_Input) {}
+InputAction::InputAction(const Arg &_Input, types::ID _Type, StringRef _Id)
+ : Action(InputClass, _Type), Input(_Input), Id(_Id.str()) {}
void BindArchAction::anchor() {}
Index: clang/lib/CodeGen/CodeGenModule.h
===================================================================
--- clang/lib/CodeGen/CodeGenModule.h
+++ clang/lib/CodeGen/CodeGenModule.h
@@ -1406,6 +1406,10 @@
TBAAAccessInfo *TBAAInfo = nullptr);
bool stopAutoInit();
+ /// Print the postfix for externalized static variable for single source
+ /// offloading languages CUDA and HIP.
+ void printPostfixForExternalizedStaticVar(llvm::raw_ostream &OS) const;
+
private:
llvm::Constant *GetOrCreateLLVMFunction(
StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable,
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -1083,6 +1083,9 @@
}
}
+ // Make unique name for device side static file-scope variable for HIP.
+ if (CGM.getContext().shouldExternalizeStaticVar(ND))
+ CGM.printPostfixForExternalizedStaticVar(Out);
return std::string(Out.str());
}
@@ -6052,3 +6055,8 @@
}
return false;
}
+
+void CodeGenModule::printPostfixForExternalizedStaticVar(
+ llvm::raw_ostream &OS) const {
+ OS << ".static." << getLangOpts().CUID;
+}
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -234,6 +234,15 @@
DeviceSideName = std::string(Out.str());
} else
DeviceSideName = std::string(ND->getIdentifier()->getName());
+
+ // Make unique name for device side static file-scope variable for HIP.
+ if (CGM.getContext().shouldExternalizeStaticVar(ND)) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << DeviceSideName;
+ CGM.printPostfixForExternalizedStaticVar(Out);
+ DeviceSideName = std::string(Out.str());
+ }
return DeviceSideName;
}
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -10273,12 +10273,20 @@
} else if (D->hasAttr<DLLExportAttr>()) {
if (L == GVA_DiscardableODR)
return GVA_StrongODR;
- } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice &&
- D->hasAttr<CUDAGlobalAttr>()) {
+ } else if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) {
// Device-side functions with __global__ attribute must always be
// visible externally so they can be launched from host.
- if (L == GVA_DiscardableODR || L == GVA_Internal)
+ if (D->hasAttr<CUDAGlobalAttr>() &&
+ (L == GVA_DiscardableODR || L == GVA_Internal))
return GVA_StrongODR;
+ // Single source offloading languages like CUDA/HIP need to be able to
+ // access static device variables from host code of the same compilation
+ // unit. This is done by externalizing the static variable with a shared
+ // name between the host and device compilation which is the same for the
+ // same compilation unit whereas different among different compilation
+ // units.
+ if (Context.shouldExternalizeStaticVar(D))
+ return GVA_StrongExternal;
}
return L;
}
@@ -11164,3 +11172,10 @@
return DB << Section.Decl;
return DB << "a prior #pragma section";
}
+
+bool ASTContext::shouldExternalizeStaticVar(const Decl *D) const {
+ return !getLangOpts().CUID.empty() &&
+ (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) &&
+ isa<VarDecl>(D) && cast<VarDecl>(D)->isFileVarDecl() &&
+ cast<VarDecl>(D)->getStorageClass() == SC_Static;
+}
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -653,6 +653,18 @@
def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">,
Flags<[CC1Option]>,
HelpText<"Default max threads per block for kernel launch bounds for HIP">;
+def cuid_EQ : Joined<["-"], "cuid=">, Flags<[CC1Option]>,
+ HelpText<"An ID for compilation unit, which should be the same for the same "
+ "compilation unit but different for different compilation units. "
+ "It is used to externalize device-side static variables for single "
+ "source offloading languages CUDA and HIP so that they can be "
+ "accessed by the host code of the same compilation unit.">;
+def fuse_cuid_EQ : Joined<["-"], "fuse-cuid=">,
+ HelpText<"Method to generate ID's for compilation units for single source "
+ "offloading languages CUDA and HIP: 'hash' (ID's generated by hashing "
+ "file path and command line options) | 'random' (ID's generated as "
+ "random numbers) | 'none' (disabled). Default is 'hash'. This option "
+ "will be overriden by option '-cuid=[ID]' if it is specified." >;
def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group<i_Group>,
HelpText<"Path to libomptarget-nvptx libraries">;
def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
Index: clang/include/clang/Driver/Compilation.h
===================================================================
--- clang/include/clang/Driver/Compilation.h
+++ clang/include/clang/Driver/Compilation.h
@@ -297,6 +297,8 @@
/// Return whether an error during the parsing of the input args.
bool containsError() const { return ContainsError; }
+ void setContainsError() { ContainsError = true; }
+
/// Redirect - Redirect output of this compilation. Can only be done once.
///
/// \param Redirects - array of optional paths. The array should have a size
Index: clang/include/clang/Driver/Action.h
===================================================================
--- clang/include/clang/Driver/Action.h
+++ clang/include/clang/Driver/Action.h
@@ -214,14 +214,18 @@
class InputAction : public Action {
const llvm::opt::Arg &Input;
-
+ std::string Id;
virtual void anchor();
public:
- InputAction(const llvm::opt::Arg &Input, types::ID Type);
+ InputAction(const llvm::opt::Arg &Input, types::ID Type,
+ StringRef Id = StringRef());
const llvm::opt::Arg &getInputArg() const { return Input; }
+ void setId(StringRef _Id) { Id = _Id.str(); }
+ StringRef getId() const { return Id; }
+
static bool classof(const Action *A) {
return A->getKind() == InputClass;
}
Index: clang/include/clang/Basic/LangOptions.h
===================================================================
--- clang/include/clang/Basic/LangOptions.h
+++ clang/include/clang/Basic/LangOptions.h
@@ -293,6 +293,12 @@
/// host code generation.
std::string OMPHostIRFile;
+ /// The user provided compilation unit ID, if non-empty. This is used to
+ /// externalize static variables which is needed to support accessing static
+ /// device variables in host code for single source offloading languages
+ /// like CUDA/HIP.
+ std::string CUID;
+
/// Indicates whether the front-end is explicitly told that the
/// input is a header file (i.e. -x c-header).
bool IsHeaderFile = false;
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -208,6 +208,8 @@
def err_drv_invalid_value : Error<"invalid value '%1' in '%0'">;
def err_drv_invalid_int_value : Error<"invalid integral value '%1' in '%0'">;
+def err_drv_invalid_cuid : Error<"invalid value '%1' in '%0' (alphanumeric characters "
+ " and underscore only)">;
def err_drv_invalid_remap_file : Error<
"invalid option '%0' not of the form <from-file>;<to-file>">;
def err_drv_invalid_gcc_output_type : Error<
Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -3018,6 +3018,9 @@
/// Return a new OMPTraitInfo object owned by this context.
OMPTraitInfo &getNewOMPTraitInfo();
+ /// Whether a C++ static variable should be externalized.
+ bool shouldExternalizeStaticVar(const Decl *D) const;
+
private:
/// All OMPTraitInfo objects live in this collection, one per
/// `pragma omp [begin] declare variant` directive.
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits