jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, JonChesterfield, yaxunl, tra.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

This patch adds the necessary changes required to bundle and wrap HIP
files. The bundling is done using `clang-offload-bundler` currently to
mimic `fatbinary` and the wrapping is done using very similar runtime
calls to CUDA. This still does not support managed / surface / texture
variables, that would require some additional information in the entry.

One difference in the codegeneration with AMD is that I don't check if
the handle is null before destructing it, I'm not sure if that's
required.

With this we should be able to support HIP with the new driver.

Depends on D128850 <https://reviews.llvm.org/D128850>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D128914

Files:
  clang/test/Driver/linker-wrapper-image.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h

Index: clang/tools/clang-linker-wrapper/OffloadWrapper.h
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.h
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.h
@@ -21,4 +21,8 @@
 /// registers the images with the CUDA runtime.
 llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef<char> Images);
 
+/// Wraps the input bundled image into the module \p M as global symbols and
+/// registers the images with the HIP runtime.
+llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef<char> Images);
+
 #endif
Index: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
@@ -22,6 +22,7 @@
 namespace {
 /// Magic number that begins the section containing the CUDA fatbinary.
 constexpr unsigned CudaFatMagic = 0x466243b1;
+constexpr unsigned HIPFatMagic = 0x48495046;
 
 /// Copied from clang/CGCudaRuntime.h.
 enum OffloadEntryKindFlag : uint32_t {
@@ -288,14 +289,15 @@
 
 /// Embed the image \p Image into the module \p M so it can be found by the
 /// runtime.
-GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image) {
+GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP) {
   LLVMContext &C = M.getContext();
   llvm::Type *Int8PtrTy = Type::getInt8PtrTy(C);
   llvm::Triple Triple = llvm::Triple(M.getTargetTriple());
 
   // Create the global string containing the fatbinary.
   StringRef FatbinConstantSection =
-      Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
+      IsHIP ? ".hip_fatbin"
+            : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
   auto *Data = ConstantDataArray::get(C, Image);
   auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
                                     GlobalVariable::InternalLinkage, Data,
@@ -303,10 +305,11 @@
   Fatbin->setSection(FatbinConstantSection);
 
   // Create the fatbinary wrapper
-  StringRef FatbinWrapperSection =
-      Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
+  StringRef FatbinWrapperSection = IsHIP               ? ".hipFatBinSegment"
+                                   : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
+                                                       : ".nvFatBinSegment";
   Constant *FatbinWrapper[] = {
-      ConstantInt::get(Type::getInt32Ty(C), CudaFatMagic),
+      ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
       ConstantInt::get(Type::getInt32Ty(C), 1),
       ConstantExpr::getPointerBitCastOrAddrSpaceCast(Fatbin, Int8PtrTy),
       ConstantPointerNull::get(Type::getInt8PtrTy(C))};
@@ -328,9 +331,10 @@
       ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
   auto *DummyEntry = new GlobalVariable(
       M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage, DummyInit,
-      "__dummy.cuda_offloading.entry");
-  DummyEntry->setSection("cuda_offloading_entries");
+      IsHIP ? "__dummy.hip_offloading.entry" : "__dummy.cuda_offloading.entry");
   DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
+  DummyEntry->setSection(IsHIP ? "hip_offloading_entries"
+                               : "cuda_offloading_entries");
 
   return FatbinDesc;
 }
@@ -358,7 +362,7 @@
 ///                         0, entry->size, 0, 0);
 ///   }
 /// }
-Function *createRegisterGlobalsFunction(Module &M) {
+Function *createRegisterGlobalsFunction(Module &M, bool IsHIP) {
   LLVMContext &C = M.getContext();
   // Get the __cudaRegisterFunction function declaration.
   auto *RegFuncTy = FunctionType::get(
@@ -368,8 +372,8 @@
        Type::getInt8PtrTy(C), Type::getInt8PtrTy(C), Type::getInt8PtrTy(C),
        Type::getInt8PtrTy(C), Type::getInt32PtrTy(C)},
       /*isVarArg*/ false);
-  FunctionCallee RegFunc =
-      M.getOrInsertFunction("__cudaRegisterFunction", RegFuncTy);
+  FunctionCallee RegFunc = M.getOrInsertFunction(
+      IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
 
   // Get the __cudaRegisterVar function declaration.
   auto *RegVarTy = FunctionType::get(
@@ -378,25 +382,31 @@
        Type::getInt8PtrTy(C), Type::getInt8PtrTy(C), Type::getInt32Ty(C),
        getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)},
       /*isVarArg*/ false);
-  FunctionCallee RegVar = M.getOrInsertFunction("__cudaRegisterVar", RegVarTy);
+  FunctionCallee RegVar = M.getOrInsertFunction(
+      IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
 
   // Create the references to the start / stop symbols defined by the linker.
-  auto *EntriesB = new GlobalVariable(
-      M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true,
-      GlobalValue::ExternalLinkage,
-      /*Initializer*/ nullptr, "__start_cuda_offloading_entries");
+  auto *EntriesB =
+      new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0),
+                         /*isConstant*/ true, GlobalValue::ExternalLinkage,
+                         /*Initializer*/ nullptr,
+                         IsHIP ? "__start_hip_offloading_entries"
+                               : "__start_cuda_offloading_entries");
   EntriesB->setVisibility(GlobalValue::HiddenVisibility);
-  auto *EntriesE = new GlobalVariable(
-      M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true,
-      GlobalValue::ExternalLinkage,
-      /*Initializer*/ nullptr, "__stop_cuda_offloading_entries");
+  auto *EntriesE =
+      new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0),
+                         /*isConstant*/ true, GlobalValue::ExternalLinkage,
+                         /*Initializer*/ nullptr,
+                         IsHIP ? "__stop_hip_offloading_entries"
+                               : "__stop_cuda_offloading_entries");
   EntriesE->setVisibility(GlobalValue::HiddenVisibility);
 
   auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C),
                                          Type::getInt8PtrTy(C)->getPointerTo(),
                                          /*isVarArg*/ false);
-  auto *RegGlobalsFn = Function::Create(
-      RegGlobalsTy, GlobalValue::InternalLinkage, ".cuda.globals_reg", &M);
+  auto *RegGlobalsFn =
+      Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
+                       IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
   RegGlobalsFn->setSection(".text.startup");
 
   // Create the loop to register all the entries.
@@ -502,24 +512,27 @@
 
 // Create the constructor and destructor to register the fatbinary with the CUDA
 // runtime.
-void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc) {
+void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
+                                  bool IsHIP) {
   LLVMContext &C = M.getContext();
   auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
-  auto *CtorFunc = Function::Create(CtorFuncTy, GlobalValue::InternalLinkage,
-                                    ".cuda.fatbin_reg", &M);
+  auto *CtorFunc =
+      Function::Create(CtorFuncTy, GlobalValue::InternalLinkage,
+                       IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg", &M);
   CtorFunc->setSection(".text.startup");
 
   auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
-  auto *DtorFunc = Function::Create(DtorFuncTy, GlobalValue::InternalLinkage,
-                                    ".cuda.fatbin_unreg", &M);
+  auto *DtorFunc =
+      Function::Create(DtorFuncTy, GlobalValue::InternalLinkage,
+                       IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg", &M);
   DtorFunc->setSection(".text.startup");
 
   // Get the __cudaRegisterFatBinary function declaration.
   auto *RegFatTy = FunctionType::get(Type::getInt8PtrTy(C)->getPointerTo(),
                                      Type::getInt8PtrTy(C),
                                      /*isVarArg*/ false);
-  FunctionCallee RegFatbin =
-      M.getOrInsertFunction("__cudaRegisterFatBinary", RegFatTy);
+  FunctionCallee RegFatbin = M.getOrInsertFunction(
+      IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
   // Get the __cudaRegisterFatBinaryEnd function declaration.
   auto *RegFatEndTy = FunctionType::get(Type::getVoidTy(C),
                                         Type::getInt8PtrTy(C)->getPointerTo(),
@@ -530,8 +543,9 @@
   auto *UnregFatTy = FunctionType::get(Type::getVoidTy(C),
                                        Type::getInt8PtrTy(C)->getPointerTo(),
                                        /*isVarArg*/ false);
-  FunctionCallee UnregFatbin =
-      M.getOrInsertFunction("__cudaUnregisterFatBinary", UnregFatTy);
+  FunctionCallee UnregFatbin = M.getOrInsertFunction(
+      IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
+      UnregFatTy);
 
   auto *AtExitTy =
       FunctionType::get(Type::getInt32Ty(C), DtorFuncTy->getPointerTo(),
@@ -542,7 +556,7 @@
       M, Type::getInt8PtrTy(C)->getPointerTo(), false,
       llvm::GlobalValue::InternalLinkage,
       llvm::ConstantPointerNull::get(Type::getInt8PtrTy(C)->getPointerTo()),
-      ".cuda.binary_handle");
+      IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle");
 
   // Create the constructor to register this image with the runtime.
   IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
@@ -552,8 +566,9 @@
   CtorBuilder.CreateAlignedStore(
       Handle, BinaryHandleGlobal,
       Align(M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C))));
-  CtorBuilder.CreateCall(createRegisterGlobalsFunction(M), Handle);
-  CtorBuilder.CreateCall(RegFatbinEnd, Handle);
+  CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP), Handle);
+  if (!IsHIP)
+    CtorBuilder.CreateCall(RegFatbinEnd, Handle);
   CtorBuilder.CreateCall(AtExit, DtorFunc);
   CtorBuilder.CreateRetVoid();
 
@@ -584,11 +599,21 @@
 }
 
 Error wrapCudaBinary(Module &M, ArrayRef<char> Image) {
-  GlobalVariable *Desc = createFatbinDesc(M, Image);
+  GlobalVariable *Desc = createFatbinDesc(M, Image, /* IsHIP */ false);
+  if (!Desc)
+    return createStringError(inconvertibleErrorCode(),
+                             "No fatinbary section created.");
+
+  createRegisterFatbinFunction(M, Desc, /* IsHIP */ false);
+  return Error::success();
+}
+
+Error wrapHIPBinary(Module &M, ArrayRef<char> Image) {
+  GlobalVariable *Desc = createFatbinDesc(M, Image, /* IsHIP */ true);
   if (!Desc)
     return createStringError(inconvertibleErrorCode(),
                              "No fatinbary section created.");
 
-  createRegisterFatbinFunction(M, Desc);
+  createRegisterFatbinFunction(M, Desc, /* IsHIP */ true);
   return Error::success();
 }
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -593,6 +593,49 @@
 
   return *TempFileOrErr;
 }
+
+Expected<StringRef>
+fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
+          Triple TheTriple) {
+  // AMDGPU uses the clang-offload-bundler to bundle the linked images.
+  Expected<std::string> OffloadBundlerPath = findProgram(
+      "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
+  if (!OffloadBundlerPath)
+    return OffloadBundlerPath.takeError();
+
+  // Create a new file to write the linked device image to.
+  auto TempFileOrErr =
+      createOutputFile(sys::path::filename(ExecutableName) + "-device-" +
+                           TheTriple.getArchName(),
+                       "hipfb");
+  if (!TempFileOrErr)
+    return TempFileOrErr.takeError();
+
+  BumpPtrAllocator Alloc;
+  StringSaver Saver(Alloc);
+
+  SmallVector<StringRef, 16> CmdArgs;
+  CmdArgs.push_back(*OffloadBundlerPath);
+  CmdArgs.push_back("-type=o");
+  CmdArgs.push_back("-bundle-align=4096");
+
+  SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux"};
+  for (const auto &FileAndArch : InputFiles)
+    Targets.push_back(
+        Saver.save("hipv4-amdgcn-amd-amdhsa--" + std::get<1>(FileAndArch)));
+  CmdArgs.push_back(Saver.save(llvm::join(Targets, ",")));
+
+  CmdArgs.push_back("-input=/dev/null");
+  for (const auto &FileAndArch : InputFiles)
+    CmdArgs.push_back(Saver.save("-input=" + std::get<0>(FileAndArch)));
+
+  CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr));
+
+  if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs))
+    return std::move(Err);
+
+  return *TempFileOrErr;
+}
 } // namespace amdgcn
 
 namespace generic {
@@ -1105,6 +1148,10 @@
     if (Error Err = wrapCudaBinary(M, BuffersToWrap.front()))
       return std::move(Err);
     break;
+  case OFK_HIP:
+    if (Error Err = wrapHIPBinary(M, BuffersToWrap.front()))
+      return std::move(Err);
+    break;
   default:
     return createStringError(inconvertibleErrorCode(),
                              getOffloadKindName(Kind) +
@@ -1153,6 +1200,29 @@
   return std::move(Buffers);
 }
 
+Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
+bundleHIP(ArrayRef<OffloadingImage> Images) {
+  SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+
+  SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
+  for (const OffloadingImage &Image : Images)
+    InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
+                                           Image.StringData.lookup("arch")));
+
+  Triple TheTriple = Triple(Images.front().StringData.lookup("triple"));
+  auto FileOrErr = amdgcn::fatbinary(InputFiles, TheTriple);
+  if (!FileOrErr)
+    return FileOrErr.takeError();
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
+      llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
+  if (std::error_code EC = ImageOrError.getError())
+    return createFileError(*FileOrErr, EC);
+  Buffers.emplace_back(std::move(*ImageOrError));
+
+  return std::move(Buffers);
+}
+
 /// Transforms the input \p Images into the binary format the runtime expects
 /// for the given \p Kind.
 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
@@ -1162,6 +1232,8 @@
     return bundleOpenMP(Images);
   case OFK_Cuda:
     return bundleCuda(Images);
+  case OFK_HIP:
+    return bundleHIP(Images);
   default:
     return createStringError(inconvertibleErrorCode(),
                              getOffloadKindName(Kind) +
Index: clang/test/Driver/linker-wrapper.c
===================================================================
--- clang/test/Driver/linker-wrapper.c
+++ clang/test/Driver/linker-wrapper.c
@@ -81,6 +81,19 @@
 // CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o
 // CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_52,file={{.*}}.out --image=profile=sm_70,file={{.*}}.out
 
+// RUN: clang-offload-packager -o %t.out \
+// RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a \
+// RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \
+// RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
+// RUN:   -fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --dry-run --host-triple x86_64-unknown-linux-gnu -linker-path \
+// RUN:   /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
+
+// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o
+// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o
+// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a -input=/dev/null -input={{.*}}.out -input={{.*}}out -output={{.*}}.hipfb
+
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
Index: clang/test/Driver/linker-wrapper-image.c
===================================================================
--- clang/test/Driver/linker-wrapper-image.c
+++ clang/test/Driver/linker-wrapper-image.c
@@ -77,7 +77,6 @@
 // CUDA-NEXT:  %5 = icmp eq i64 %size, 0
 // CUDA-NEXT:  br i1 %5, label %if.then, label %if.else
 
-
 //      CUDA: if.then:
 // CUDA-NEXT:   %6 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
 // CUDA-NEXT:   br label %if.end
@@ -111,3 +110,84 @@
 //      CUDA: while.end:
 // CUDA-NEXT:   ret void
 // CUDA-NEXT: }
+
+// RUN: clang-offload-packager -o %t.out --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
+// RUN:   -fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple x86_64-unknown-linux-gnu \
+// RUN:   -linker-path /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
+
+//      HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
+// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+// HIP-NEXT: @__dummy.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries"
+// HIP-NEXT: @.hip.binary_handle = internal global ptr null
+// HIP-NEXT: @__start_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
+// HIP-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
+// HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }]
+
+//      HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT:   %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+// HIP-NEXT:   store ptr %0, ptr @.hip.binary_handle, align 8
+// HIP-NEXT:   call void @.hip.globals_reg(ptr %0)
+// HIP-NEXT:   %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+// HIP-NEXT:   ret void
+// HIP-NEXT: }
+
+//      HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT:   %0 = load ptr, ptr @.hip.binary_handle, align 8
+// HIP-NEXT:   call void @__hipUnregisterFatBinary(ptr %0)
+// HIP-NEXT:   ret void
+// HIP-NEXT: }
+
+//      HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT:   br i1 icmp ne (ptr @__start_hip_offloading_entries, ptr @__stop_hip_offloading_entries), label %while.entry, label %while.end
+
+//      HIP: while.entry:
+// HIP-NEXT:   %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %7, %if.end ]
+// HIP-NEXT:   %1 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 0
+// HIP-NEXT:   %addr = load ptr, ptr %1, align 8
+// HIP-NEXT:   %2 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 1
+// HIP-NEXT:   %name = load ptr, ptr %2, align 8
+// HIP-NEXT:   %3 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 2
+// HIP-NEXT:   %size = load i64, ptr %3, align 4
+// HIP-NEXT:   %4 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 3
+// HIP-NEXT:   %flag = load i32, ptr %4, align 4
+// HIP-NEXT:   %5 = icmp eq i64 %size, 0
+// HIP-NEXT:   br i1 %5, label %if.then, label %if.else
+
+//      HIP: if.then:
+// HIP-NEXT:   %6 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
+// HIP-NEXT:   br label %if.end
+
+//      HIP: if.else:
+// HIP-NEXT:   switch i32 %flag, label %if.end [
+// HIP-NEXT:     i32 0, label %sw.global
+// HIP-NEXT:     i32 1, label %sw.managed
+// HIP-NEXT:     i32 2, label %sw.surface
+// HIP-NEXT:     i32 3, label %sw.texture
+// HIP-NEXT:   ]
+
+//      HIP: sw.global:
+// HIP-NEXT:   call void @__hipRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 0, i64 %size, i32 0, i32 0)
+// HIP-NEXT:   br label %if.end
+
+//      HIP: sw.managed:
+// HIP-NEXT:   br label %if.end
+
+//      HIP: sw.surface:
+// HIP-NEXT:   br label %if.end
+
+//      HIP: sw.texture:
+// HIP-NEXT:   br label %if.end
+
+//      HIP: if.end:
+// HIP-NEXT:   %7 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 1
+// HIP-NEXT:   %8 = icmp eq ptr %7, @__stop_hip_offloading_entries
+// HIP-NEXT:   br i1 %8, label %while.end, label %while.entry
+
+//      HIP: while.end:
+// HIP-NEXT:   ret void
+// HIP-NEXT: }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to