jhuber6 updated this revision to Diff 426404.
jhuber6 added a comment.
Fix test.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D123471/new/
https://reviews.llvm.org/D123471
Files:
clang/include/clang/Basic/LangOptions.def
clang/include/clang/Driver/Options.td
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/CodeGen/CGCUDARuntime.h
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/CodeGenCUDA/offloading-entries.cu
Index: clang/test/CodeGenCUDA/offloading-entries.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/offloading-entries.cu
@@ -0,0 +1,33 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu \
+// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
+// RUN: --check-prefix=HOST %s
+
+#include "Inputs/cuda.h"
+
+//.
+// HOST: @x = internal global i32 undef, align 4
+// HOST: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
+// HOST: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// HOST: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00"
+// HOST: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+// HOST: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00"
+// HOST: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
+//.
+// HOST-LABEL: @_Z18__device_stub__foov(
+// HOST-NEXT: entry:
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov)
+// HOST-NEXT: br label [[SETUP_END:%.*]]
+// HOST: setup.end:
+// HOST-NEXT: ret void
+//
+__global__ void foo() {}
+// HOST-LABEL: @_Z18__device_stub__barv(
+// HOST-NEXT: entry:
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__barv)
+// HOST-NEXT: br label [[SETUP_END:%.*]]
+// HOST: setup.end:
+// HOST-NEXT: ret void
+//
+__global__ void bar() {}
+__device__ int x = 1;
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6079,6 +6079,10 @@
options::OPT_fno_openmp_extensions);
}
+ // Forward the new driver to change offloading code generation.
+ if (Args.hasArg(options::OPT_offload_new_driver))
+ CmdArgs.push_back("--offload-new-driver");
+
SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);
const XRayArgs &XRay = TC.getXRayArgs();
Index: clang/lib/CodeGen/CGCUDARuntime.h
===================================================================
--- clang/lib/CodeGen/CGCUDARuntime.h
+++ clang/lib/CodeGen/CGCUDARuntime.h
@@ -52,6 +52,24 @@
Texture, // Builtin texture
};
+ /// The kind flag of the target region entry.
+ enum OffloadRegionEntryKindFlag : uint32_t {
+ /// Mark the region entry as a kernel.
+ OffloadRegionKernelEntry = 0x0,
+ };
+
+ /// The kind flag of the global variable entry.
+ enum OffloadVarEntryKindFlag : uint32_t {
+ /// Mark the entry as a global variable.
+ OffloadGlobalVarEntry = 0x0,
+ /// Mark the entry as a managed global variable.
+ OffloadGlobalManagedEntry = 0x1,
+ /// Mark the entry as a surface variable.
+ OffloadGlobalSurfaceEntry = 0x2,
+ /// Mark the entry as a texture variable.
+ OffloadGlobalTextureEntry = 0x4,
+ };
+
private:
unsigned Kind : 2;
unsigned Extern : 1;
Index: clang/lib/CodeGen/CGCUDANV.cpp
===================================================================
--- clang/lib/CodeGen/CGCUDANV.cpp
+++ clang/lib/CodeGen/CGCUDANV.cpp
@@ -158,6 +158,8 @@
llvm::Function *makeModuleDtorFunction();
/// Transform managed variables for device compilation.
void transformManagedVars();
+ /// Create offloading entries to register globals in RDC mode.
+ void createOffloadingEntries();
public:
CGNVCUDARuntime(CodeGenModule &CGM);
@@ -211,7 +213,8 @@
CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
TheModule(CGM.getModule()),
- RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
+ RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode ||
+ CGM.getLangOpts().OffloadingNewDriver),
DeviceMC(InitDeviceMC(CGM)) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
@@ -1110,6 +1113,40 @@
}
}
+// Creates offloading entries for all the kernels and globals that must be
+// registered. The linker will provide a pointer to this section so we can
+// register the symbols with the linked device image.
+void CGNVCUDARuntime::createOffloadingEntries() {
+ llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule());
+ OMPBuilder.initialize();
+
+ StringRef Section = "cuda_offloading_entries";
+ for (KernelInfo &I : EmittedKernels)
+ OMPBuilder.emitOffloadingEntry(
+ KernelHandles[I.Kernel], getDeviceSideName(cast<NamedDecl>(I.D)), 0,
+ DeviceVarFlags::OffloadRegionKernelEntry, Section);
+
+ for (VarInfo &I : DeviceVars) {
+ uint64_t VarSize =
+ CGM.getDataLayout().getTypeAllocSize(I.Var->getValueType());
+ if (I.Flags.getKind() == DeviceVarFlags::Variable) {
+ OMPBuilder.emitOffloadingEntry(
+ I.Var, getDeviceSideName(I.D), VarSize,
+ I.Flags.isManaged() ? DeviceVarFlags::OffloadGlobalManagedEntry
+ : DeviceVarFlags::OffloadGlobalVarEntry,
+ Section);
+ } else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
+ OMPBuilder.emitOffloadingEntry(I.Var, getDeviceSideName(I.D), VarSize,
+ DeviceVarFlags::OffloadGlobalSurfaceEntry,
+ Section);
+ } else if (I.Flags.getKind() == DeviceVarFlags::Texture) {
+ OMPBuilder.emitOffloadingEntry(I.Var, getDeviceSideName(I.D), VarSize,
+ DeviceVarFlags::OffloadGlobalTextureEntry,
+ Section);
+ }
+ }
+}
+
// Returns module constructor to be added.
llvm::Function *CGNVCUDARuntime::finalizeModule() {
if (CGM.getLangOpts().CUDAIsDevice) {
@@ -1138,7 +1175,11 @@
}
return nullptr;
}
- return makeModuleCtorFunction();
+ if (!(CGM.getLangOpts().OffloadingNewDriver && RelocatableDeviceCode))
+ return makeModuleCtorFunction();
+
+ createOffloadingEntries();
+ return nullptr;
}
llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2526,9 +2526,9 @@
PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, BothFlags<[NoArgumentUnused, HelpHidden]>>;
def static_openmp: Flag<["-"], "static-openmp">,
HelpText<"Use the static host OpenMP runtime while linking.">;
-def offload_new_driver : Flag<["--"], "offload-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
- HelpText<"Use the new driver for offloading compilation.">;
-def no_offload_new_driver : Flag<["--"], "no-offload-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
+def offload_new_driver : Flag<["--"], "offload-new-driver">, Flags<[CC1Option]>, Group<f_Group>,
+ MarshallingInfoFlag<LangOpts<"OffloadingNewDriver">>, HelpText<"Use the new driver for offloading compilation.">;
+def no_offload_new_driver : Flag<["--"], "no-offload-new-driver">, Flags<[CC1Option]>, Group<f_Group>,
HelpText<"Don't Use the new driver for offloading compilation.">;
def offload_device_only : Flag<["--"], "offload-device-only">,
HelpText<"Only compile for the offloading device.">;
@@ -2543,7 +2543,7 @@
def cuda_compile_host_device : Flag<["--"], "cuda-compile-host-device">, Alias<offload_host_device>,
HelpText<"Compile CUDA code for both host and device (default). Has no "
"effect on non-CUDA compilations.">;
-def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
+def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<f_Group>,
HelpText<"Use the new driver for OpenMP offloading.">;
def fno_openmp_new_driver : Flag<["-"], "fno-openmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
Alias<no_offload_new_driver>, HelpText<"Don't use the new driver for OpenMP offloading.">;
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -267,6 +267,7 @@
LANGOPT(GPUMaxThreadsPerBlock, 32, 1024, "default max threads per block for kernel launch bounds for HIP")
LANGOPT(GPUDeferDiag, 1, 0, "defer host/device related diagnostic messages for CUDA/HIP")
LANGOPT(GPUExcludeWrongSideOverloads, 1, 0, "always exclude wrong side overloads in overloading resolution for CUDA/HIP")
+LANGOPT(OffloadingNewDriver, 1, 0, "use the new driver for generating offloading code.")
LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits