[PATCH] D139287: [OpenMP] Introduce basic JIT support to OpenMP target offloading

Shilei Tian via Phabricator via cfe-commits Tue, 27 Dec 2022 16:07:58 -0800

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
tianshilei1992 marked an inline comment as done.
Closed by commit rG58906e4901ec: [OpenMP] Introduce basic JIT support to OpenMP 
target offloading (authored by tianshilei1992).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139287/new/

https://reviews.llvm.org/D139287

Files:
  openmp/libomptarget/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
  openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
  openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
  openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
  openmp/libomptarget/test/lit.cfg

Index: openmp/libomptarget/test/lit.cfg
===================================================================
--- openmp/libomptarget/test/lit.cfg
+++ openmp/libomptarget/test/lit.cfg
@@ -34,6 +34,15 @@
     else:
         config.environment[name] = value
 
+# Evalute the environment variable which is a string boolean value.
+def evaluate_bool_env(env):
+    env = env.lower()
+    possible_true_values = ["on", "true", "1"]
+    for v in possible_true_values:
+        if env == v:
+            return True
+    return False
+
 # name: The name of this test suite.
 config.name = 'libomptarget :: ' + config.libomptarget_current_target
 
@@ -111,10 +120,17 @@
         config.test_flags += " --libomptarget-nvptx-bc-path=" + config.library_dir
     if config.libomptarget_current_target.endswith('-LTO'):
         config.test_flags += " -foffload-lto"
+    if config.libomptarget_current_target.endswith('-JIT-LTO') and evaluate_bool_env(
+        config.environment['LIBOMPTARGET_NEXTGEN_PLUGINS']
+    ):
+        config.test_flags += " -foffload-lto"
+        config.test_flags += " -Wl,--embed-bitcode"
 
 def remove_suffix_if_present(name):
     if name.endswith('-LTO'):
         return name[:-4]
+    elif name.endswith('-JIT-LTO'):
+        return name[:-8]
     else:
         return name
 
Index: openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -364,6 +364,10 @@
   Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
     return true;
   }
+
+  Triple::ArchType getTripleArch() const override {
+    return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
+  }
 };
 
 GenericPluginTy *Plugin::createPlugin() { return new GenELF64PluginTy(); }
Index: openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -278,6 +278,14 @@
                                  GridValues.GV_Warp_Size))
       return Err;
 
+    if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
+                                 ComputeCapability.Major))
+      return Err;
+
+    if (auto Err = getDeviceAttr(CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
+                                 ComputeCapability.Minor))
+      return Err;
+
     return Plugin::success();
   }
 
@@ -794,6 +802,9 @@
     return Plugin::check(Res, "Error in cuDeviceGetAttribute: %s");
   }
 
+  /// See GenericDeviceTy::getArch().
+  std::string getArch() const override { return ComputeCapability.str(); }
+
 private:
   using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
   using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
@@ -810,6 +821,15 @@
 
   /// The CUDA device handler.
   CUdevice Device = CU_DEVICE_INVALID;
+
+  /// The compute capability of the corresponding CUDA device.
+  struct ComputeCapabilityTy {
+    uint32_t Major;
+    uint32_t Minor;
+    std::string str() const {
+      return "sm_" + std::to_string(Major * 10 + Minor);
+    }
+  } ComputeCapability;
 };
 
 Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
@@ -908,6 +928,11 @@
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
 
+  Triple::ArchType getTripleArch() const override {
+    // TODO: I think we can drop the support for 32-bit NVPTX devices.
+    return Triple::nvptx64;
+  }
+
   /// Check whether the image is compatible with the available CUDA devices.
   Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
     for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -26,6 +26,7 @@
 #include "omptarget.h"
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Allocator.h"
@@ -377,6 +378,17 @@
   }
   uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
 
+  /// Get target architecture.
+  virtual std::string getArch() const {
+    llvm_unreachable("device doesn't support JIT");
+  }
+
+  /// Post processing after jit backend. The ownership of \p MB will be taken.
+  virtual Expected<std::unique_ptr<MemoryBuffer>>
+  doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
+    return MB;
+  }
+
 private:
   /// Register offload entry for global variable.
   Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
@@ -526,6 +538,11 @@
   /// Get the ELF code to recognize the binary image of this plugin.
   virtual uint16_t getMagicElfBits() const = 0;
 
+  /// Get the target triple of this plugin.
+  virtual Triple::ArchType getTripleArch() const {
+    llvm_unreachable("target doesn't support jit");
+  }
+
   /// Allocate a structure using the internal allocator.
   template <typename Ty> Ty *allocate() {
     return reinterpret_cast<Ty *>(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -11,6 +11,7 @@
 #include "PluginInterface.h"
 #include "Debug.h"
 #include "GlobalHandler.h"
+#include "JIT.h"
 #include "elf_common.h"
 #include "omptarget.h"
 #include "omptargetplugin.h"
@@ -629,7 +630,10 @@
   if (!Plugin::isActive())
     return false;
 
-  return elf_check_machine(TgtImage, Plugin::get().getMagicElfBits());
+  if (elf_check_machine(TgtImage, Plugin::get().getMagicElfBits()))
+    return true;
+
+  return jit::checkBitcodeImage(TgtImage, Plugin::get().getTripleArch());
 }
 
 int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage,
@@ -700,7 +704,37 @@
 __tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
                                           __tgt_device_image *TgtImage) {
   GenericPluginTy &Plugin = Plugin::get();
-  auto TableOrErr = Plugin.getDevice(DeviceId).loadBinary(Plugin, TgtImage);
+  GenericDeviceTy &Device = Plugin.getDevice(DeviceId);
+
+  // If it is a bitcode image, we have to jit the binary image before loading to
+  // the device.
+  {
+    UInt32Envar JITOptLevel("LIBOMPTARGET_JIT_OPT_LEVEL", 3);
+    Triple::ArchType TA = Plugin.getTripleArch();
+    std::string Arch = Device.getArch();
+
+    jit::PostProcessingFn PostProcessing =
+        [&Device](std::unique_ptr<MemoryBuffer> MB)
+        -> Expected<std::unique_ptr<MemoryBuffer>> {
+      return Device.doJITPostProcessing(std::move(MB));
+    };
+
+    if (jit::checkBitcodeImage(TgtImage, TA)) {
+      auto TgtImageOrErr =
+          jit::compile(TgtImage, TA, Arch, JITOptLevel, PostProcessing);
+      if (!TgtImageOrErr) {
+        auto Err = TgtImageOrErr.takeError();
+        REPORT("Failure to jit binary image from bitcode image %p on device "
+               "%d: %s\n",
+               TgtImage, DeviceId, toString(std::move(Err)).data());
+        return nullptr;
+      }
+
+      TgtImage = *TgtImageOrErr;
+    }
+  }
+
+  auto TableOrErr = Device.loadBinary(Plugin, TgtImage);
   if (!TableOrErr) {
     auto Err = TableOrErr.takeError();
     REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage,
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
===================================================================
--- /dev/null
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.h
@@ -0,0 +1,50 @@
+//===- JIT.h - Target independent JIT infrastructure ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
+#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Error.h"
+
+#include <functional>
+#include <memory>
+#include <string>
+
+struct __tgt_device_image;
+
+namespace llvm {
+class MemoryBuffer;
+
+namespace omp {
+namespace jit {
+
+/// Function type for a callback that will be called after the backend is
+/// called.
+using PostProcessingFn = std::function<Expected<std::unique_ptr<MemoryBuffer>>(
+    std::unique_ptr<MemoryBuffer>)>;
+
+/// Check if \p Image contains bitcode with triple \p Triple.
+bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA);
+
+/// Compile the bitcode image \p Image and generate the binary image that can be
+/// loaded to the target device of the triple \p Triple architecture \p MCpu. \p
+/// PostProcessing will be called after codegen to handle cases such as assember
+/// as an external tool.
+Expected<__tgt_device_image *> compile(__tgt_device_image *Image,
+                                       Triple::ArchType TA, std::string MCpu,
+                                       unsigned OptLevel,
+                                       PostProcessingFn PostProcessing);
+} // namespace jit
+} // namespace omp
+} // namespace llvm
+
+#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_JIT_H
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
===================================================================
--- /dev/null
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp
@@ -0,0 +1,366 @@
+//===- JIT.cpp - Target independent JIT infrastructure --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "Debug.h"
+
+#include "omptarget.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LLVMRemarkStreamer.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include <mutex>
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace omp;
+
+static codegen::RegisterCodeGenFlags RCGF;
+
+namespace {
+std::once_flag InitFlag;
+
+void init(Triple TT) {
+  if (TT.isNVPTX()) {
+    LLVMInitializeNVPTXTargetInfo();
+    LLVMInitializeNVPTXTarget();
+    LLVMInitializeNVPTXTargetMC();
+    LLVMInitializeNVPTXAsmPrinter();
+  } else if (TT.isAMDGPU()) {
+    LLVMInitializeAMDGPUTargetInfo();
+    LLVMInitializeAMDGPUTarget();
+    LLVMInitializeAMDGPUTargetMC();
+    LLVMInitializeAMDGPUAsmPrinter();
+  } else {
+    FAILURE_MESSAGE("unsupported JIT target");
+    abort();
+  }
+
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeVectorization(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeTarget(Registry);
+
+  initializeExpandLargeDivRemLegacyPassPass(Registry);
+  initializeExpandLargeFpConvertLegacyPassPass(Registry);
+  initializeExpandMemCmpPassPass(Registry);
+  initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
+  initializeSelectOptimizePass(Registry);
+  initializeCodeGenPreparePass(Registry);
+  initializeAtomicExpandPass(Registry);
+  initializeRewriteSymbolsLegacyPassPass(Registry);
+  initializeWinEHPreparePass(Registry);
+  initializeDwarfEHPrepareLegacyPassPass(Registry);
+  initializeSafeStackLegacyPassPass(Registry);
+  initializeSjLjEHPreparePass(Registry);
+  initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
+  initializeGlobalMergePass(Registry);
+  initializeIndirectBrExpandPassPass(Registry);
+  initializeInterleavedLoadCombinePass(Registry);
+  initializeInterleavedAccessPass(Registry);
+  initializeUnreachableBlockElimLegacyPassPass(Registry);
+  initializeExpandReductionsPass(Registry);
+  initializeExpandVectorPredicationPass(Registry);
+  initializeWasmEHPreparePass(Registry);
+  initializeWriteBitcodePassPass(Registry);
+  initializeHardwareLoopsPass(Registry);
+  initializeTypePromotionPass(Registry);
+  initializeReplaceWithVeclibLegacyPass(Registry);
+  initializeJMCInstrumenterPass(Registry);
+}
+
+Expected<std::unique_ptr<Module>>
+createModuleFromImage(__tgt_device_image *Image, LLVMContext &Context) {
+  StringRef Data((const char *)Image->ImageStart,
+                 (char *)Image->ImageEnd - (char *)Image->ImageStart);
+  std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
+      Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
+  SMDiagnostic Err;
+  auto Mod = parseIR(*MB, Err, Context);
+  if (!Mod)
+    return make_error<StringError>("Failed to create module",
+                                   inconvertibleErrorCode());
+  return Mod;
+}
+
+CodeGenOpt::Level getCGOptLevel(unsigned OptLevel) {
+  switch (OptLevel) {
+  case 0:
+    return CodeGenOpt::None;
+  case 1:
+    return CodeGenOpt::Less;
+  case 2:
+    return CodeGenOpt::Default;
+  case 3:
+    return CodeGenOpt::Aggressive;
+  }
+  llvm_unreachable("Invalid optimization level");
+}
+
+OptimizationLevel getOptLevel(unsigned OptLevel) {
+  switch (OptLevel) {
+  case 0:
+    return OptimizationLevel::O0;
+  case 1:
+    return OptimizationLevel::O1;
+  case 2:
+    return OptimizationLevel::O2;
+  case 3:
+    return OptimizationLevel::O3;
+  }
+  llvm_unreachable("Invalid optimization level");
+}
+
+Expected<std::unique_ptr<TargetMachine>>
+createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) {
+  Triple TT(M.getTargetTriple());
+  CodeGenOpt::Level CGOptLevel = getCGOptLevel(OptLevel);
+
+  std::string Msg;
+  const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
+  if (!T)
+    return make_error<StringError>(Msg, inconvertibleErrorCode());
+
+  SubtargetFeatures Features;
+  Features.getDefaultSubtargetFeatures(TT);
+
+  std::optional<Reloc::Model> RelocModel;
+  if (M.getModuleFlag("PIC Level"))
+    RelocModel =
+        M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
+
+  std::optional<CodeModel::Model> CodeModel = M.getCodeModel();
+
+  TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(TT);
+
+  std::unique_ptr<TargetMachine> TM(
+      T->createTargetMachine(M.getTargetTriple(), CPU, Features.getString(),
+                             Options, RelocModel, CodeModel, CGOptLevel));
+  if (!TM)
+    return make_error<StringError>("Failed to create target machine",
+                                   inconvertibleErrorCode());
+  return TM;
+}
+
+///
+class JITEngine {
+public:
+  JITEngine(Triple::ArchType TA, std::string MCpu)
+      : TT(Triple::getArchTypeName(TA)), CPU(MCpu) {
+    std::call_once(InitFlag, init, TT);
+  }
+
+  /// Run jit compilation. It is expected to get a memory buffer containing the
+  /// generated device image that could be loaded to the device directly.
+  Expected<std::unique_ptr<MemoryBuffer>>
+  run(__tgt_device_image *Image, unsigned OptLevel,
+      jit::PostProcessingFn PostProcessing);
+
+private:
+  /// Run backend, which contains optimization and code generation.
+  Expected<std::unique_ptr<MemoryBuffer>> backend(Module &M, unsigned OptLevel);
+
+  /// Run optimization pipeline.
+  void opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
+           unsigned OptLevel);
+
+  /// Run code generation.
+  void codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
+               raw_pwrite_stream &OS);
+
+  LLVMContext Context;
+  const Triple TT;
+  const std::string CPU;
+};
+
+void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
+                    unsigned OptLevel) {
+  PipelineTuningOptions PTO;
+  std::optional<PGOOptions> PGOOpt;
+
+  LoopAnalysisManager LAM;
+  FunctionAnalysisManager FAM;
+  CGSCCAnalysisManager CGAM;
+  ModuleAnalysisManager MAM;
+  ModulePassManager MPM;
+
+  PassBuilder PB(TM, PTO, PGOOpt, nullptr);
+
+  FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
+
+  // Register all the basic analyses with the managers.
+  PB.registerModuleAnalyses(MAM);
+  PB.registerCGSCCAnalyses(CGAM);
+  PB.registerFunctionAnalyses(FAM);
+  PB.registerLoopAnalyses(LAM);
+  PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+  MPM.addPass(PB.buildPerModuleDefaultPipeline(getOptLevel(OptLevel)));
+
+  MPM.run(M, MAM);
+}
+
+void JITEngine::codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII,
+                        Module &M, raw_pwrite_stream &OS) {
+  legacy::PassManager PM;
+  PM.add(new TargetLibraryInfoWrapperPass(*TLII));
+  MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(
+      reinterpret_cast<LLVMTargetMachine *>(TM));
+  TM->addPassesToEmitFile(PM, OS, nullptr,
+                          TT.isNVPTX() ? CGFT_AssemblyFile : CGFT_ObjectFile,
+                          /* DisableVerify */ false, MMIWP);
+
+  PM.run(M);
+}
+
+Expected<std::unique_ptr<MemoryBuffer>> JITEngine::backend(Module &M,
+                                                           unsigned OptLevel) {
+
+  auto RemarksFileOrErr = setupLLVMOptimizationRemarks(
+      Context, /* RemarksFilename */ "", /* RemarksPasses */ "",
+      /* RemarksFormat */ "", /* RemarksWithHotness */ false);
+  if (Error E = RemarksFileOrErr.takeError())
+    return std::move(E);
+  if (*RemarksFileOrErr)
+    (*RemarksFileOrErr)->keep();
+
+  auto TMOrErr = createTargetMachine(M, CPU, OptLevel);
+  if (!TMOrErr)
+    return TMOrErr.takeError();
+
+  std::unique_ptr<TargetMachine> TM = std::move(*TMOrErr);
+  TargetLibraryInfoImpl TLII(TT);
+
+  opt(TM.get(), &TLII, M, OptLevel);
+
+  // Prepare the output buffer and stream for codegen.
+  SmallVector<char> CGOutputBuffer;
+  raw_svector_ostream OS(CGOutputBuffer);
+
+  codegen(TM.get(), &TLII, M, OS);
+
+  return MemoryBuffer::getMemBufferCopy(OS.str());
+}
+
+Expected<std::unique_ptr<MemoryBuffer>>
+JITEngine::run(__tgt_device_image *Image, unsigned OptLevel,
+               jit::PostProcessingFn PostProcessing) {
+  auto ModOrErr = createModuleFromImage(Image, Context);
+  if (!ModOrErr)
+    return ModOrErr.takeError();
+
+  auto Mod = std::move(*ModOrErr);
+
+  auto MBOrError = backend(*Mod, OptLevel);
+  if (!MBOrError)
+    return MBOrError.takeError();
+
+  return PostProcessing(std::move(*MBOrError));
+}
+
+/// A map from a bitcode image start address to its corresponding triple. If the
+/// image is not in the map, it is not a bitcode image.
+DenseMap<void *, Triple::ArchType> BitcodeImageMap;
+
+/// Output images generated from LLVM backend.
+SmallVector<std::unique_ptr<MemoryBuffer>, 4> JITImages;
+
+/// A list of __tgt_device_image images.
+std::list<__tgt_device_image> TgtImages;
+} // namespace
+
+namespace llvm {
+namespace omp {
+namespace jit {
+bool checkBitcodeImage(__tgt_device_image *Image, Triple::ArchType TA) {
+  TimeTraceScope TimeScope("Check bitcode image");
+
+  {
+    auto Itr = BitcodeImageMap.find(Image->ImageStart);
+    if (Itr != BitcodeImageMap.end() && Itr->second == TA)
+      return true;
+  }
+
+  StringRef Data(reinterpret_cast<const char *>(Image->ImageStart),
+                 reinterpret_cast<char *>(Image->ImageEnd) -
+                     reinterpret_cast<char *>(Image->ImageStart));
+  std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
+      Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
+  if (!MB)
+    return false;
+
+  Expected<object::IRSymtabFile> FOrErr = object::readIRSymtab(*MB);
+  if (!FOrErr) {
+    consumeError(FOrErr.takeError());
+    return false;
+  }
+
+  auto ActualTriple = FOrErr->TheReader.getTargetTriple();
+
+  if (Triple(ActualTriple).getArch() == TA) {
+    BitcodeImageMap[Image->ImageStart] = TA;
+    return true;
+  }
+
+  return false;
+}
+
+Expected<__tgt_device_image *> compile(__tgt_device_image *Image,
+                                       Triple::ArchType TA, std::string MCPU,
+                                       unsigned OptLevel,
+                                       PostProcessingFn PostProcessing) {
+  JITEngine J(TA, MCPU);
+
+  auto ImageMBOrErr = J.run(Image, OptLevel, PostProcessing);
+  if (!ImageMBOrErr)
+    return ImageMBOrErr.takeError();
+
+  JITImages.push_back(std::move(*ImageMBOrErr));
+  TgtImages.push_back(*Image);
+
+  auto &ImageMB = JITImages.back();
+  auto *NewImage = &TgtImages.back();
+
+  NewImage->ImageStart = (void *)ImageMB->getBufferStart();
+  NewImage->ImageEnd = (void *)ImageMB->getBufferEnd();
+
+  return NewImage;
+}
+
+} // namespace jit
+} // namespace omp
+} // namespace llvm
Index: openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
===================================================================
--- openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
+++ openmp/libomptarget/plugins-nextgen/common/PluginInterface/CMakeLists.txt
@@ -12,7 +12,8 @@
 
 # NOTE: Don't try to build `PluginInterface` using `add_llvm_library` because we
 # don't want to export `PluginInterface` while `add_llvm_library` requires that.
-add_library(PluginInterface OBJECT PluginInterface.cpp GlobalHandler.cpp)
+add_library(PluginInterface OBJECT
+  PluginInterface.cpp GlobalHandler.cpp JIT.cpp)
 
 # This is required when using LLVM libraries.
 llvm_update_compile_flags(PluginInterface)
@@ -20,7 +21,31 @@
 if (LLVM_LINK_LLVM_DYLIB)
   set(llvm_libs LLVM)
 else()
-  llvm_map_components_to_libnames(llvm_libs Support)
+  llvm_map_components_to_libnames(llvm_libs
+    ${LLVM_TARGETS_TO_BUILD}
+    AggressiveInstCombine
+    Analysis
+    BinaryFormat
+    BitReader
+    BitWriter
+    CodeGen
+    Core
+    Extensions
+    InstCombine
+    Instrumentation
+    IPO
+    IRReader
+    Linker
+    MC
+    Object
+    Passes
+    Remarks
+    ScalarOpts
+    Support
+    Target
+    TransformUtils
+    Vectorize
+  )
 endif()
 
 target_link_libraries(PluginInterface
Index: openmp/libomptarget/plugins-nextgen/CMakeLists.txt
===================================================================
--- openmp/libomptarget/plugins-nextgen/CMakeLists.txt
+++ openmp/libomptarget/plugins-nextgen/CMakeLists.txt
@@ -33,6 +33,9 @@
     # Define macro with the ELF ID for this target.
     add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
 
+    # Define target regiple
+    add_definitions("-DLIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE=${tmachine}")
+
     add_llvm_library("omptarget.rtl.${tmachine_libname}.nextgen"
       SHARED
 
Index: openmp/libomptarget/CMakeLists.txt
===================================================================
--- openmp/libomptarget/CMakeLists.txt
+++ openmp/libomptarget/CMakeLists.txt
@@ -50,6 +50,7 @@
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} x86_64-pc-linux-gnu-LTO")
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda")
 set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-LTO")
+set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda-JIT-LTO")
 
 # Once the plugins for the different targets are validated, they will be added to
 # the list of supported targets in the current system.
@@ -73,7 +74,7 @@
 # Follow host OMPT support and check if host support has been requested.
 # LIBOMP_HAVE_OMPT_SUPPORT indicates whether host OMPT support has been implemented.
 # LIBOMP_OMPT_SUPPORT indicates whether host OMPT support has been requested (default is ON).
-# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON). 
+# LIBOMPTARGET_OMPT_SUPPORT indicates whether target OMPT support has been requested (default is ON).
 set(OMPT_TARGET_DEFAULT FALSE)
 if ((LIBOMP_HAVE_OMPT_SUPPORT) AND (LIBOMP_OMPT_SUPPORT) AND (NOT WIN32))
   set (OMPT_TARGET_DEFAULT TRUE)

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D139287: [OpenMP] Introduce basic JIT support to OpenMP target offloading

Reply via email to