jhuber6 updated this revision to Diff 442356.
jhuber6 added a comment.

Addressing some comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128914/new/

https://reviews.llvm.org/D128914

Files:
  clang/test/Driver/linker-wrapper-image.c
  clang/test/Driver/linker-wrapper.c
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h

Index: clang/tools/clang-linker-wrapper/OffloadWrapper.h
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.h
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.h
@@ -21,4 +21,8 @@
 /// registers the images with the CUDA runtime.
 llvm::Error wrapCudaBinary(llvm::Module &M, llvm::ArrayRef<char> Images);
 
+/// Wraps the input bundled image into the module \p M as global symbols and
+/// registers the images with the HIP runtime.
+llvm::Error wrapHIPBinary(llvm::Module &M, llvm::ArrayRef<char> Images);
+
 #endif
Index: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
@@ -22,6 +22,7 @@
 namespace {
 /// Magic number that begins the section containing the CUDA fatbinary.
 constexpr unsigned CudaFatMagic = 0x466243b1;
+constexpr unsigned HIPFatMagic = 0x48495046;
 
 /// Copied from clang/CGCudaRuntime.h.
 enum OffloadEntryKindFlag : uint32_t {
@@ -288,14 +289,15 @@
 
 /// Embed the image \p Image into the module \p M so it can be found by the
 /// runtime.
-GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image) {
+GlobalVariable *createFatbinDesc(Module &M, ArrayRef<char> Image, bool IsHIP) {
   LLVMContext &C = M.getContext();
   llvm::Type *Int8PtrTy = Type::getInt8PtrTy(C);
   llvm::Triple Triple = llvm::Triple(M.getTargetTriple());
 
   // Create the global string containing the fatbinary.
   StringRef FatbinConstantSection =
-      Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
+      IsHIP ? ".hip_fatbin"
+            : (Triple.isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin");
   auto *Data = ConstantDataArray::get(C, Image);
   auto *Fatbin = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
                                     GlobalVariable::InternalLinkage, Data,
@@ -303,10 +305,11 @@
   Fatbin->setSection(FatbinConstantSection);
 
   // Create the fatbinary wrapper
-  StringRef FatbinWrapperSection =
-      Triple.isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
+  StringRef FatbinWrapperSection = IsHIP               ? ".hipFatBinSegment"
+                                   : Triple.isMacOSX() ? "__NV_CUDA,__fatbin"
+                                                       : ".nvFatBinSegment";
   Constant *FatbinWrapper[] = {
-      ConstantInt::get(Type::getInt32Ty(C), CudaFatMagic),
+      ConstantInt::get(Type::getInt32Ty(C), IsHIP ? HIPFatMagic : CudaFatMagic),
       ConstantInt::get(Type::getInt32Ty(C), 1),
       ConstantExpr::getPointerBitCastOrAddrSpaceCast(Fatbin, Int8PtrTy),
       ConstantPointerNull::get(Type::getInt8PtrTy(C))};
@@ -328,9 +331,10 @@
       ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u));
   auto *DummyEntry = new GlobalVariable(
       M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage, DummyInit,
-      "__dummy.cuda_offloading.entry");
-  DummyEntry->setSection("cuda_offloading_entries");
+      IsHIP ? "__dummy.hip_offloading.entry" : "__dummy.cuda_offloading.entry");
   DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
+  DummyEntry->setSection(IsHIP ? "hip_offloading_entries"
+                               : "cuda_offloading_entries");
 
   return FatbinDesc;
 }
@@ -358,7 +362,7 @@
 ///                         0, entry->size, 0, 0);
 ///   }
 /// }
-Function *createRegisterGlobalsFunction(Module &M) {
+Function *createRegisterGlobalsFunction(Module &M, bool IsHIP) {
   LLVMContext &C = M.getContext();
   // Get the __cudaRegisterFunction function declaration.
   auto *RegFuncTy = FunctionType::get(
@@ -368,8 +372,8 @@
        Type::getInt8PtrTy(C), Type::getInt8PtrTy(C), Type::getInt8PtrTy(C),
        Type::getInt8PtrTy(C), Type::getInt32PtrTy(C)},
       /*isVarArg*/ false);
-  FunctionCallee RegFunc =
-      M.getOrInsertFunction("__cudaRegisterFunction", RegFuncTy);
+  FunctionCallee RegFunc = M.getOrInsertFunction(
+      IsHIP ? "__hipRegisterFunction" : "__cudaRegisterFunction", RegFuncTy);
 
   // Get the __cudaRegisterVar function declaration.
   auto *RegVarTy = FunctionType::get(
@@ -378,25 +382,31 @@
        Type::getInt8PtrTy(C), Type::getInt8PtrTy(C), Type::getInt32Ty(C),
        getSizeTTy(M), Type::getInt32Ty(C), Type::getInt32Ty(C)},
       /*isVarArg*/ false);
-  FunctionCallee RegVar = M.getOrInsertFunction("__cudaRegisterVar", RegVarTy);
+  FunctionCallee RegVar = M.getOrInsertFunction(
+      IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
 
   // Create the references to the start / stop symbols defined by the linker.
-  auto *EntriesB = new GlobalVariable(
-      M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true,
-      GlobalValue::ExternalLinkage,
-      /*Initializer*/ nullptr, "__start_cuda_offloading_entries");
+  auto *EntriesB =
+      new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0),
+                         /*isConstant*/ true, GlobalValue::ExternalLinkage,
+                         /*Initializer*/ nullptr,
+                         IsHIP ? "__start_hip_offloading_entries"
+                               : "__start_cuda_offloading_entries");
   EntriesB->setVisibility(GlobalValue::HiddenVisibility);
-  auto *EntriesE = new GlobalVariable(
-      M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true,
-      GlobalValue::ExternalLinkage,
-      /*Initializer*/ nullptr, "__stop_cuda_offloading_entries");
+  auto *EntriesE =
+      new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0),
+                         /*isConstant*/ true, GlobalValue::ExternalLinkage,
+                         /*Initializer*/ nullptr,
+                         IsHIP ? "__stop_hip_offloading_entries"
+                               : "__stop_cuda_offloading_entries");
   EntriesE->setVisibility(GlobalValue::HiddenVisibility);
 
   auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C),
                                          Type::getInt8PtrTy(C)->getPointerTo(),
                                          /*isVarArg*/ false);
-  auto *RegGlobalsFn = Function::Create(
-      RegGlobalsTy, GlobalValue::InternalLinkage, ".cuda.globals_reg", &M);
+  auto *RegGlobalsFn =
+      Function::Create(RegGlobalsTy, GlobalValue::InternalLinkage,
+                       IsHIP ? ".hip.globals_reg" : ".cuda.globals_reg", &M);
   RegGlobalsFn->setSection(".text.startup");
 
   // Create the loop to register all the entries.
@@ -502,24 +512,27 @@
 
 // Create the constructor and destructor to register the fatbinary with the CUDA
 // runtime.
-void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc) {
+void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
+                                  bool IsHIP) {
   LLVMContext &C = M.getContext();
   auto *CtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
-  auto *CtorFunc = Function::Create(CtorFuncTy, GlobalValue::InternalLinkage,
-                                    ".cuda.fatbin_reg", &M);
+  auto *CtorFunc =
+      Function::Create(CtorFuncTy, GlobalValue::InternalLinkage,
+                       IsHIP ? ".hip.fatbin_reg" : ".cuda.fatbin_reg", &M);
   CtorFunc->setSection(".text.startup");
 
   auto *DtorFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
-  auto *DtorFunc = Function::Create(DtorFuncTy, GlobalValue::InternalLinkage,
-                                    ".cuda.fatbin_unreg", &M);
+  auto *DtorFunc =
+      Function::Create(DtorFuncTy, GlobalValue::InternalLinkage,
+                       IsHIP ? ".hip.fatbin_unreg" : ".cuda.fatbin_unreg", &M);
   DtorFunc->setSection(".text.startup");
 
   // Get the __cudaRegisterFatBinary function declaration.
   auto *RegFatTy = FunctionType::get(Type::getInt8PtrTy(C)->getPointerTo(),
                                      Type::getInt8PtrTy(C),
                                      /*isVarArg*/ false);
-  FunctionCallee RegFatbin =
-      M.getOrInsertFunction("__cudaRegisterFatBinary", RegFatTy);
+  FunctionCallee RegFatbin = M.getOrInsertFunction(
+      IsHIP ? "__hipRegisterFatBinary" : "__cudaRegisterFatBinary", RegFatTy);
   // Get the __cudaRegisterFatBinaryEnd function declaration.
   auto *RegFatEndTy = FunctionType::get(Type::getVoidTy(C),
                                         Type::getInt8PtrTy(C)->getPointerTo(),
@@ -530,8 +543,9 @@
   auto *UnregFatTy = FunctionType::get(Type::getVoidTy(C),
                                        Type::getInt8PtrTy(C)->getPointerTo(),
                                        /*isVarArg*/ false);
-  FunctionCallee UnregFatbin =
-      M.getOrInsertFunction("__cudaUnregisterFatBinary", UnregFatTy);
+  FunctionCallee UnregFatbin = M.getOrInsertFunction(
+      IsHIP ? "__hipUnregisterFatBinary" : "__cudaUnregisterFatBinary",
+      UnregFatTy);
 
   auto *AtExitTy =
       FunctionType::get(Type::getInt32Ty(C), DtorFuncTy->getPointerTo(),
@@ -542,7 +556,7 @@
       M, Type::getInt8PtrTy(C)->getPointerTo(), false,
       llvm::GlobalValue::InternalLinkage,
       llvm::ConstantPointerNull::get(Type::getInt8PtrTy(C)->getPointerTo()),
-      ".cuda.binary_handle");
+      IsHIP ? ".hip.binary_handle" : ".cuda.binary_handle");
 
   // Create the constructor to register this image with the runtime.
   IRBuilder<> CtorBuilder(BasicBlock::Create(C, "entry", CtorFunc));
@@ -552,8 +566,9 @@
   CtorBuilder.CreateAlignedStore(
       Handle, BinaryHandleGlobal,
       Align(M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C))));
-  CtorBuilder.CreateCall(createRegisterGlobalsFunction(M), Handle);
-  CtorBuilder.CreateCall(RegFatbinEnd, Handle);
+  CtorBuilder.CreateCall(createRegisterGlobalsFunction(M, IsHIP), Handle);
+  if (!IsHIP)
+    CtorBuilder.CreateCall(RegFatbinEnd, Handle);
   CtorBuilder.CreateCall(AtExit, DtorFunc);
   CtorBuilder.CreateRetVoid();
 
@@ -584,11 +599,21 @@
 }
 
 Error wrapCudaBinary(Module &M, ArrayRef<char> Image) {
-  GlobalVariable *Desc = createFatbinDesc(M, Image);
+  GlobalVariable *Desc = createFatbinDesc(M, Image, /* IsHIP */ false);
+  if (!Desc)
+    return createStringError(inconvertibleErrorCode(),
+                             "No fatinbary section created.");
+
+  createRegisterFatbinFunction(M, Desc, /* IsHIP */ false);
+  return Error::success();
+}
+
+Error wrapHIPBinary(Module &M, ArrayRef<char> Image) {
+  GlobalVariable *Desc = createFatbinDesc(M, Image, /* IsHIP */ true);
   if (!Desc)
     return createStringError(inconvertibleErrorCode(),
                              "No fatinbary section created.");
 
-  createRegisterFatbinFunction(M, Desc);
+  createRegisterFatbinFunction(M, Desc, /* IsHIP */ true);
   return Error::success();
 }
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -596,6 +596,49 @@
 
   return *TempFileOrErr;
 }
+
+Expected<StringRef>
+fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
+          Triple TheTriple) {
+  // AMDGPU uses the clang-offload-bundler to bundle the linked images.
+  Expected<std::string> OffloadBundlerPath = findProgram(
+      "clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
+  if (!OffloadBundlerPath)
+    return OffloadBundlerPath.takeError();
+
+  // Create a new file to write the linked device image to.
+  auto TempFileOrErr =
+      createOutputFile(sys::path::filename(ExecutableName) + "-device-" +
+                           TheTriple.getArchName(),
+                       "hipfb");
+  if (!TempFileOrErr)
+    return TempFileOrErr.takeError();
+
+  BumpPtrAllocator Alloc;
+  StringSaver Saver(Alloc);
+
+  SmallVector<StringRef, 16> CmdArgs;
+  CmdArgs.push_back(*OffloadBundlerPath);
+  CmdArgs.push_back("-type=o");
+  CmdArgs.push_back("-bundle-align=4096");
+
+  SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux"};
+  for (const auto &FileAndArch : InputFiles)
+    Targets.push_back(
+        Saver.save("hipv4-amdgcn-amd-amdhsa--" + std::get<1>(FileAndArch)));
+  CmdArgs.push_back(Saver.save(llvm::join(Targets, ",")));
+
+  CmdArgs.push_back("-input=/dev/null");
+  for (const auto &FileAndArch : InputFiles)
+    CmdArgs.push_back(Saver.save("-input=" + std::get<0>(FileAndArch)));
+
+  CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr));
+
+  if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs))
+    return std::move(Err);
+
+  return *TempFileOrErr;
+}
 } // namespace amdgcn
 
 namespace generic {
@@ -1108,6 +1151,10 @@
     if (Error Err = wrapCudaBinary(M, BuffersToWrap.front()))
       return std::move(Err);
     break;
+  case OFK_HIP:
+    if (Error Err = wrapHIPBinary(M, BuffersToWrap.front()))
+      return std::move(Err);
+    break;
   default:
     return createStringError(inconvertibleErrorCode(),
                              getOffloadKindName(Kind) +
@@ -1135,7 +1182,6 @@
 
 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
 bundleCuda(ArrayRef<OffloadingImage> Images) {
-  SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
 
   SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
   for (const OffloadingImage &Image : Images)
@@ -1149,6 +1195,31 @@
 
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
       llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
+
+  SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+  if (std::error_code EC = ImageOrError.getError())
+    return createFileError(*FileOrErr, EC);
+  Buffers.emplace_back(std::move(*ImageOrError));
+
+  return std::move(Buffers);
+}
+
+Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
+bundleHIP(ArrayRef<OffloadingImage> Images) {
+  SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
+  for (const OffloadingImage &Image : Images)
+    InputFiles.emplace_back(std::make_pair(Image.Image->getBufferIdentifier(),
+                                           Image.StringData.lookup("arch")));
+
+  Triple TheTriple = Triple(Images.front().StringData.lookup("triple"));
+  auto FileOrErr = amdgcn::fatbinary(InputFiles, TheTriple);
+  if (!FileOrErr)
+    return FileOrErr.takeError();
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
+      llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
+
+  SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
   if (std::error_code EC = ImageOrError.getError())
     return createFileError(*FileOrErr, EC);
   Buffers.emplace_back(std::move(*ImageOrError));
@@ -1165,6 +1236,8 @@
     return bundleOpenMP(Images);
   case OFK_Cuda:
     return bundleCuda(Images);
+  case OFK_HIP:
+    return bundleHIP(Images);
   default:
     return createStringError(inconvertibleErrorCode(),
                              getOffloadKindName(Kind) +
Index: clang/test/Driver/linker-wrapper.c
===================================================================
--- clang/test/Driver/linker-wrapper.c
+++ clang/test/Driver/linker-wrapper.c
@@ -81,6 +81,19 @@
 // CUDA: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o {{.*}}.o
 // CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_52,file={{.*}}.out --image=profile=sm_70,file={{.*}}.out
 
+// RUN: clang-offload-packager -o %t.out \
+// RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a \
+// RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a \
+// RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
+// RUN:   -fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --dry-run --host-triple x86_64-unknown-linux-gnu -linker-path \
+// RUN:   /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
+
+// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o
+// HIP: lld{{.*}}-flavor gnu --no-undefined -shared -o {{.*}}.out {{.*}}.o
+// HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx908,hipv4-amdgcn-amd-amdhsa--gfx90a -input=/dev/null -input={{.*}}.out -input={{.*}}out -output={{.*}}.hipfb
+
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx908 \
 // RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -93,6 +106,7 @@
 // LINKER_ARGS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.out {{.*}}.o a
 // LINKER_ARGS: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o a b
 
+/// Ensure that temp files aren't leftoever from static libraries.
 // RUN: clang-offload-packager -o %t-lib.out \
 // RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
 // RUN:   --image=file=%S/Inputs/dummy-elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52
Index: clang/test/Driver/linker-wrapper-image.c
===================================================================
--- clang/test/Driver/linker-wrapper-image.c
+++ clang/test/Driver/linker-wrapper-image.c
@@ -77,7 +77,6 @@
 // CUDA-NEXT:  %5 = icmp eq i64 %size, 0
 // CUDA-NEXT:  br i1 %5, label %if.then, label %if.else
 
-
 //      CUDA: if.then:
 // CUDA-NEXT:   %6 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
 // CUDA-NEXT:   br label %if.end
@@ -111,3 +110,84 @@
 //      CUDA: while.end:
 // CUDA-NEXT:   ret void
 // CUDA-NEXT: }
+
+// RUN: clang-offload-packager -o %t.out --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
+// RUN:   -fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple x86_64-unknown-linux-gnu \
+// RUN:   -linker-path /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
+
+//      HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
+// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
+// HIP-NEXT: @__dummy.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries"
+// HIP-NEXT: @.hip.binary_handle = internal global ptr null
+// HIP-NEXT: @__start_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
+// HIP-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
+// HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }]
+
+//      HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT:   %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
+// HIP-NEXT:   store ptr %0, ptr @.hip.binary_handle, align 8
+// HIP-NEXT:   call void @.hip.globals_reg(ptr %0)
+// HIP-NEXT:   %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
+// HIP-NEXT:   ret void
+// HIP-NEXT: }
+
+//      HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT:   %0 = load ptr, ptr @.hip.binary_handle, align 8
+// HIP-NEXT:   call void @__hipUnregisterFatBinary(ptr %0)
+// HIP-NEXT:   ret void
+// HIP-NEXT: }
+
+//      HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
+// HIP-NEXT: entry:
+// HIP-NEXT:   br i1 icmp ne (ptr @__start_hip_offloading_entries, ptr @__stop_hip_offloading_entries), label %while.entry, label %while.end
+
+//      HIP: while.entry:
+// HIP-NEXT:   %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %7, %if.end ]
+// HIP-NEXT:   %1 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 0
+// HIP-NEXT:   %addr = load ptr, ptr %1, align 8
+// HIP-NEXT:   %2 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 1
+// HIP-NEXT:   %name = load ptr, ptr %2, align 8
+// HIP-NEXT:   %3 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 2
+// HIP-NEXT:   %size = load i64, ptr %3, align 4
+// HIP-NEXT:   %4 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 3
+// HIP-NEXT:   %flag = load i32, ptr %4, align 4
+// HIP-NEXT:   %5 = icmp eq i64 %size, 0
+// HIP-NEXT:   br i1 %5, label %if.then, label %if.else
+
+//      HIP: if.then:
+// HIP-NEXT:   %6 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
+// HIP-NEXT:   br label %if.end
+
+//      HIP: if.else:
+// HIP-NEXT:   switch i32 %flag, label %if.end [
+// HIP-NEXT:     i32 0, label %sw.global
+// HIP-NEXT:     i32 1, label %sw.managed
+// HIP-NEXT:     i32 2, label %sw.surface
+// HIP-NEXT:     i32 3, label %sw.texture
+// HIP-NEXT:   ]
+
+//      HIP: sw.global:
+// HIP-NEXT:   call void @__hipRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 0, i64 %size, i32 0, i32 0)
+// HIP-NEXT:   br label %if.end
+
+//      HIP: sw.managed:
+// HIP-NEXT:   br label %if.end
+
+//      HIP: sw.surface:
+// HIP-NEXT:   br label %if.end
+
+//      HIP: sw.texture:
+// HIP-NEXT:   br label %if.end
+
+//      HIP: if.end:
+// HIP-NEXT:   %7 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 1
+// HIP-NEXT:   %8 = icmp eq ptr %7, @__stop_hip_offloading_entries
+// HIP-NEXT:   br i1 %8, label %while.end, label %while.entry
+
+//      HIP: while.end:
+// HIP-NEXT:   ret void
+// HIP-NEXT: }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to