jhuber6 updated this revision to Diff 442356.
jhuber6 added a comment.
Addressing some comments.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D128914/new/
https://reviews.llvm.org/D128914
Files:
clang/test/Driver/linker-wrapper-image.c
clang/test/Driver/linker-wrapper.c
clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
clang/tools/clang-linker-wrapper/OffloadWrapper.h
Index: clang/tools/clang-linker-wrapper/OffloadWrapper.h
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.h
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.h
@@ -21,4 +21,8 @@
/// registers the images with the CUDA runtime.
llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef<char> Images);
+/// Wraps the input bundled image into the module \p M as global symbols and
+/// registers the images with the HIP runtime.
+llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef<char> Images);
+
#endif
Index: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
@@ -22,6 +22,7 @@
namespace {
/// Magic number that begins the section containing the CUDA fatbinary.
constexpr unsigned CudaFatMagic = 0x466243b1;
+constexpr unsigned HIPFatMagic = 0x48495046;
/// Copied from clang/CGCudaRuntime.h.
enum OffloadEntryKindFlag : uint32_t {
@@ -288,14 +289,15 @@
/// Embed the image \p Image into the module \p M so it can be found by the
/// runtime.
-GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image) {
+GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP) {
LLVMContext &C = M.getContext();
llvm::Type *Int8PtrTy = Type::getInt8PtrTy(C);
llvm::Triple Triple = llvm::Triple(M.getTargetTriple());
// Create the global string containing the fatbinary.
StringRef FatbinConstantSection =
- Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
+ IsHIP ? ".hip_fatbin"
+ : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
auto *Data = ConstantDataArray::get(C, Image);
auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
GlobalVariable::InternalLinkage, Data,
@@ -303,10 +305,11 @@
Fatbin->setSection(FatbinConstantSection);
// Create the fatbinary wrapper
- StringRef FatbinWrapperSection =
- Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
+ StringRef FatbinWrapperSection = IsHIP ? ".hipFatBinSegment"
+ : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
+ : ".nvFatBinSegment";
Constant *FatbinWrapper[] = {
- ConstantInt::get(Type::getInt32Ty(C), CudaFatMagic),
+ ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
ConstantInt::get(Type::getInt32Ty(C), 1),
ConstantExpr::getPointerBitCastOrAddrSpaceCast(Fatbin, Int8PtrTy),
ConstantPointerNull::get(Type::getInt8PtrTy(C))};
@@ -328,9 +331,10 @@
ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
auto *DummyEntry = new GlobalVariable(
M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage, DummyInit,
- "__dummy.cuda_offloading.entry");
- DummyEntry->setSection("cuda_offloading_entries");
+ IsHIP ? "__dummy.hip_offloading.entry" : "__dummy.cuda_offloading.entry");
DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
+ DummyEntry->setSection(IsHIP ? "hip_offloading_entries"
+ : "cuda_offloading_entries");
return FatbinDesc;
}
@@ -358,7 +362,7 @@
/// 0, entry->size, 0, 0);
/// }
/// }
-Function *createRegisterGlobalsFunction(Module &M) {
+Function *createRegisterGlobalsFunction(Module &M, bool IsHIP) {
LLVMContext &C = M.getContext();
// Get the __cudaRegisterFunction function declaration.
auto *RegFuncTy = FunctionType::get(
@@ -368,8 +372,8 @@
Type::getInt8PtrTy(C), Type::getInt8PtrTy(C), Type::getInt8PtrTy(C),
Type::getInt8PtrTy(C), Type::getInt32PtrTy(C)},
/*isVarArg*/ false);
- FunctionCallee RegFunc =
- M.getOrInsertFunction("__cudaRegisterFunction", RegFuncTy);
+ FunctionCallee RegFunc = M.getOrInsertFunction(
+ IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
// Get the __cudaRegisterVar function declaration.
auto *RegVarTy = FunctionType::get(
@@ -378,25 +382,31 @@
Type::getInt8PtrTy(C), Type::getInt8PtrTy(C), Type::getInt32Ty(C),
getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)},
/*isVarArg*/ false);
- FunctionCallee RegVar = M.getOrInsertFunction("__cudaRegisterVar", RegVarTy);
+ FunctionCallee RegVar = M.getOrInsertFunction(
+ IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
// Create the references to the start / stop symbols defined by the linker.
- auto *EntriesB = new GlobalVariable(
- M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true,
- GlobalValue::ExternalLinkage,
- /*Initializer*/ nullptr, "__start_cuda_offloading_entries");
+ auto *EntriesB =
+ new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0),
+ /*isConstant*/ true, GlobalValue::ExternalLinkage,
+ /*Initializer*/ nullptr,
+ IsHIP ? "__start_hip_offloading_entries"
+ : "__start_cuda_offloading_entries");
EntriesB->setVisibility(GlobalValue::HiddenVisibility);
- auto *EntriesE = new GlobalVariable(
- M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true,
- GlobalValue::ExternalLinkage,
- /*Initializer*/ nullptr, "__stop_cuda_offloading_entries");
+ auto *EntriesE =
+ new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0),
+ /*isConstant*/ true, GlobalValue::ExternalLinkage,
+ /*Initializer*/ nullptr,
+ IsHIP ? "__stop_hip_offloading_entries"
+ : "__stop_cuda_offloading_entries");
EntriesE->setVisibility(GlobalValue::HiddenVisibility);
auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C),
Type::getInt8PtrTy(C)->getPointerTo(),
/*isVarArg*/ false);
- auto *RegGlobalsFn = Function::Create(
- RegGlobalsTy, GlobalValue::InternalLinkage, ".cuda.globals_reg", &M);
+ auto *RegGlobalsFn =
+ Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
+ IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
RegGlobalsFn->setSection(".text.startup");
// Create the loop to register all the entries.
@@ -502,24 +512,27 @@
// Create the constructor and destructor to register the fatbinary with the CUDA
// runtime.
-void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc) {
+void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
+ bool IsHIP) {
LLVMContext &C = M.getContext();
auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
- auto *CtorFunc = Function::Create(CtorFuncTy, GlobalValue::InternalLinkage,
- ".cuda.fatbin_reg", &M);
+ auto *CtorFunc =
+ Function::Create(CtorFuncTy, GlobalValue::InternalLinkage,
+ IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg", &M);
CtorFunc->setSection(".text.startup");
auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
- auto *DtorFunc = Function::Create(DtorFuncTy, GlobalValue::InternalLinkage,
- ".cuda.fatbin_unreg", &M);
+ auto *DtorFunc =
+ Function::Create(DtorFuncTy, GlobalValue::InternalLinkage,
+ IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg", &M);
DtorFunc->setSection(".text.startup");
// Get the __cudaRegisterFatBinary function declaration.
auto *RegFatTy = FunctionType::get(Type::getInt8PtrTy(C)->getPointerTo(),
Type::getInt8PtrTy(C),
/*isVarArg*/ false);
- FunctionCallee RegFatbin =
- M.getOrInsertFunction("__cudaRegisterFatBinary", RegFatTy);
+ FunctionCallee RegFatbin = M.getOrInsertFunction(
+ IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
// Get the __cudaRegisterFatBinaryEnd function declaration.
auto *RegFatEndTy = FunctionType::get(Type::getVoidTy(C),
Type::getInt8PtrTy(C)->getPointerTo(),
@@ -530,8 +543,9 @@
auto *UnregFatTy = FunctionType::get(Type::getVoidTy(C),
Type::getInt8PtrTy(C)->getPointerTo(),
/*isVarArg*/ false);
- FunctionCallee UnregFatbin =
- M.getOrInsertFunction("__cudaUnregisterFatBinary", UnregFatTy);
+ FunctionCallee UnregFatbin = M.getOrInsertFunction(
+ IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
+ UnregFatTy);
auto *AtExitTy =
FunctionType::get(Type::getInt32Ty(C), DtorFuncTy->getPointerTo(),
@@ -542,7 +556,7 @@
M, Type::getInt8PtrTy(C)->getPointerTo(), false,
llvm::GlobalValue::InternalLinkage,
llvm::ConstantPointerNull::get(Type::getInt8PtrTy(C)->getPointerTo()),
- ".cuda.binary_handle");
+ IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle");
// Create the constructor to register this image with the runtime.
IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
@@ -552,8 +566,9 @@
CtorBuilder.CreateAlignedStore(
Handle, BinaryHandleGlobal,
Align(M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C))));
- CtorBuilder.CreateCall(createRegisterGlobalsFunction(M), Handle);
- CtorBuilder.CreateCall(RegFatbinEnd, Handle);
+ CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP), Handle);
+ if (!IsHIP)
+ CtorBuilder.CreateCall(RegFatbinEnd, Handle);
CtorBuilder.CreateCall(AtExit, DtorFunc);
CtorBuilder.CreateRetVoid();
@@ -584,11 +599,21 @@
}
Error wrapCudaBinary(Module &M, ArrayRef<char> Image) {
- GlobalVariable *Desc = createFatbinDesc(M, Image);
+ GlobalVariable *Desc = createFatbinDesc(M, Image, /* IsHIP */ false);
+ if (!Desc)
+ return createStringError(inconvertibleErrorCode(),
+ "No fatinbary section created.");
+
+ createRegisterFatbinFunction(M, Desc, /* IsHIP */ false);
+ return Error::success();
+}
+
+Error wrapHIPBinary(Module &M, ArrayRef<char> Image) {
+ GlobalVariable *Desc = createFatbinDesc(M, Image, /* IsHIP */ true);
if (!Desc)
return createStringError(inconvertibleErrorCode(),
"No fatinbary section created.");
- createRegisterFatbinFunction(M, Desc);
+ createRegisterFatbinFunction(M, Desc, /* IsHIP */ true);
return Error::success();
}
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -596,6 +596,49 @@
return *TempFileOrErr;
}
+
+Expected<StringRef>
+fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
+ Triple TheTriple) {
+ // AMDGPU uses the clang-offload-bundler to bundle the linked images.
+ Expected<std::string> OffloadBundlerPath = findProgram(
+ "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
+ if (!OffloadBundlerPath)
+ return OffloadBundlerPath.takeError();
+
+ // Create a new file to write the linked device image to.
+ auto TempFileOrErr =
+ createOutputFile(sys::path::filename(ExecutableName) + "-device-" +
+ TheTriple.getArchName(),
+ "hipfb");
+ if (!TempFileOrErr)
+ return TempFileOrErr.takeError();
+
+ BumpPtrAllocator Alloc;
+ StringSaver Saver(Alloc);
+
+ SmallVector<StringRef, 16> CmdArgs;
+ CmdArgs.push_back(*OffloadBundlerPath);
+ CmdArgs.push_back("-type=o");
+ CmdArgs.push_back("-bundle-align=4096");
+
+ SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux"};
+ for (const auto &FileAndArch : InputFiles)
+ Targets.push_back(
+ Saver.save("hipv4-amdgcn-amd-amdhsa--" + std::get<1>(FileAndArch)));
+ CmdArgs.push_back(Saver.save(llvm::join(Targets, ",")));
+
+ CmdArgs.push_back("-input=/dev/null");
+ for (const auto &FileAndArch : InputFiles)
+ CmdArgs.push_back(Saver.save("-input=" + std::get<0>(FileAndArch)));
+
+ CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr));
+
+ if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs))
+ return std::move(Err);
+
+ return *TempFileOrErr;
+}
} // namespace amdgcn
namespace generic {
@@ -1108,6 +1151,10 @@
if (Error Err = wrapCudaBinary(M, BuffersToWrap.front()))
return std::move(Err);
break;
+ case OFK_HIP:
+ if (Error Err = wrapHIPBinary(M, BuffersToWrap.front()))
+ return std::move(Err);
+ break;
default:
return createStringError(inconvertibleErrorCode(),
getOffloadKindName(Kind) +
@@ -1135,7 +1182,6 @@
Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
bundleCuda(ArrayRef<OffloadingImage> Images) {
- SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
for (const OffloadingImage &Image : Images)
@@ -1149,6 +1195,31 @@
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
+
+ SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+ if (std::error_code EC = ImageOrError.getError())
+ return createFileError(*FileOrErr, EC);
+ Buffers.emplace_back(std::move(*ImageOrError));
+
+ return std::move(Buffers);
+}
+
+Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
+bundleHIP(ArrayRef<OffloadingImage> Images) {
+ SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
+ for (const OffloadingImage &Image : Images)
+ InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
+ Image.StringData.lookup("arch")));
+
+ Triple TheTriple = Triple(Images.front().StringData.lookup("triple"));
+ auto FileOrErr = amdgcn::fatbinary(InputFiles, TheTriple);
+ if (!FileOrErr)
+ return FileOrErr.takeError();
+
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
+ llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
+
+ SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
if (std::error_code EC = ImageOrError.getError())
return createFileError(*FileOrErr, EC);
Buffers.emplace_back(std::move(*ImageOrError));
@@ -1165,6 +1236,8 @@
return bundleOpenMP(Images);
case OFK_Cuda:
return bundleCuda(Images);
+ case OFK_HIP:
+ return bundleHIP(Images);
default:
return createStringError(inconvertibleErrorCode(),
getOffloadKindName(Kind) +
Index: clang/test/Driver/linker-wrapper.c
===================================================================
--- clang/test/Driver/linker-wrapper.c
+++ clang/test/Driver/linker-wrapper.c
@@ -81,6 +81,19 @@
// CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o
// CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_52,file={{.*}}.out --image=profile=sm_70,file={{.*}}.out
+// RUN: clang-offload-packager -o %t.out \
+// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a \
+// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \
+// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
+// RUN: -fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --dry-run --host-triple x86_64-unknown-linux-gnu -linker-path \
+// RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
+
+// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o
+// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o
+// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a -input=/dev/null -input={{.*}}.out -input={{.*}}out -output={{.*}}.hipfb
+
// RUN: clang-offload-packager -o %t.out \
// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -93,6 +106,7 @@
// LINKER_ARGS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.out {{.*}}.o a
// LINKER_ARGS: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o a b
+/// Ensure that temp files aren't leftoever from static libraries.
// RUN: clang-offload-packager -o %t-lib.out \
// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
// RUN: --image=file=%S/Inputs/dummy-elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52
Index: clang/test/Driver/linker-wrapper-image.c
===================================================================
--- clang/test/Driver/linker-wrapper-image.c
+++ clang/test/Driver/linker-wrapper-image.c
@@ -77,7 +77,6 @@
// CUDA-NEXT: %5 = icmp eq i64 %size, 0
// CUDA-NEXT: br i1 %5, label %if.then, label %if.else
-
// CUDA: if.then:
// CUDA-NEXT: %6 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
// CUDA-NEXT: br label %if.end
@@ -111,3 +110,84 @@
// CUDA: while.end:
// CUDA-NEXT: ret void
// CUDA-NEXT: }
+
+// RUN: clang-offload-packager -o %t.out --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
+// RUN: -fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple x86_64-unknown-linux-gnu \
+// RUN: -linker-path /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
+
+// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
+// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+// HIP-NEXT: @__dummy.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries"
+// HIP-NEXT: @.hip.binary_handle = internal global ptr null
+// HIP-NEXT: @__start_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
+// HIP-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
+// HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }]
+
+// HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+// HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
+// HIP-NEXT: call void @.hip.globals_reg(ptr %0)
+// HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+// HIP-NEXT: ret void
+// HIP-NEXT: }
+
+// HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
+// HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
+// HIP-NEXT: ret void
+// HIP-NEXT: }
+
+// HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT: br i1 icmp ne (ptr @__start_hip_offloading_entries, ptr @__stop_hip_offloading_entries), label %while.entry, label %while.end
+
+// HIP: while.entry:
+// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %7, %if.end ]
+// HIP-NEXT: %1 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 0
+// HIP-NEXT: %addr = load ptr, ptr %1, align 8
+// HIP-NEXT: %2 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 1
+// HIP-NEXT: %name = load ptr, ptr %2, align 8
+// HIP-NEXT: %3 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 2
+// HIP-NEXT: %size = load i64, ptr %3, align 4
+// HIP-NEXT: %4 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 3
+// HIP-NEXT: %flag = load i32, ptr %4, align 4
+// HIP-NEXT: %5 = icmp eq i64 %size, 0
+// HIP-NEXT: br i1 %5, label %if.then, label %if.else
+
+// HIP: if.then:
+// HIP-NEXT: %6 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
+// HIP-NEXT: br label %if.end
+
+// HIP: if.else:
+// HIP-NEXT: switch i32 %flag, label %if.end [
+// HIP-NEXT: i32 0, label %sw.global
+// HIP-NEXT: i32 1, label %sw.managed
+// HIP-NEXT: i32 2, label %sw.surface
+// HIP-NEXT: i32 3, label %sw.texture
+// HIP-NEXT: ]
+
+// HIP: sw.global:
+// HIP-NEXT: call void @__hipRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 0, i64 %size, i32 0, i32 0)
+// HIP-NEXT: br label %if.end
+
+// HIP: sw.managed:
+// HIP-NEXT: br label %if.end
+
+// HIP: sw.surface:
+// HIP-NEXT: br label %if.end
+
+// HIP: sw.texture:
+// HIP-NEXT: br label %if.end
+
+// HIP: if.end:
+// HIP-NEXT: %7 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 1
+// HIP-NEXT: %8 = icmp eq ptr %7, @__stop_hip_offloading_entries
+// HIP-NEXT: br i1 %8, label %while.end, label %while.entry
+
+// HIP: while.end:
+// HIP-NEXT: ret void
+// HIP-NEXT: }
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits