saiislam updated this revision to Diff 431975.
saiislam added a comment.

Changed the embedding scheme to add ImageInfo field in __tgt_device_image.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124525/new/

https://reviews.llvm.org/D124525

Files:
  clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
  clang/tools/clang-linker-wrapper/OffloadWrapper.h
  openmp/libomptarget/include/omptarget.h
  openmp/libomptarget/src/rtl.cpp

Index: openmp/libomptarget/src/rtl.cpp
===================================================================
--- openmp/libomptarget/src/rtl.cpp
+++ openmp/libomptarget/src/rtl.cpp
@@ -13,6 +13,7 @@
 #include "rtl.h"
 #include "device.h"
 #include "private.h"
+//#include "llvm/OffloadArch/OffloadArch.h"
 
 #include <cassert>
 #include <cstdlib>
@@ -20,6 +21,8 @@
 #include <dlfcn.h>
 #include <mutex>
 #include <string>
+// It's strange we do not have llvm tools for openmp runtime, so we use stat
+#include <sys/stat.h>
 
 // List of all plugins that can support offloading.
 static const char *RTLNames[] = {
@@ -351,18 +354,108 @@
     initRTLonce(R);
 }
 
+/// Query runtime capabilities of this system by calling offload-arch -c
+/// offload_arch_output_buffer is persistant storage returned by this
+/// __tgt_get_active_offload_env.
+static void
+__tgt_get_active_offload_env(__tgt_active_offload_env *active_env,
+                             char *offload_arch_output_buffer,
+                             size_t offload_arch_output_buffer_size) {
+
+  // If OFFLOAD_ARCH_OVERRIDE env varible is present then use its value instead
+  // of querying it using LLVMOffloadArch library.
+  if (char *OffloadArchEnvVar = getenv("OFFLOAD_ARCH_OVERRIDE")) {
+    if (OffloadArchEnvVar) {
+      active_env->capabilities = OffloadArchEnvVar;
+      return;
+    }
+  }
+  // Qget runtime capabilities of this system with libLLVMOffloadArch.a
+  // if (int rc = getRuntimeCapabilities(offload_arch_output_buffer,
+  //                                     offload_arch_output_buffer_size))
+  //   return;
+  // active_env->capabilities = offload_arch_output_buffer;
+  // return;
+}
+
+std::vector<std::string> _splitstrings(char *input, const char *sep) {
+  std::vector<std::string> split_strings;
+  std::string s(input);
+  std::string delimiter(sep);
+  size_t pos = 0;
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+    if (pos != 0)
+      split_strings.push_back(s.substr(0, pos));
+    s.erase(0, pos + delimiter.length());
+  }
+  if (s.length() > 1)
+    split_strings.push_back(s.substr(0, s.length()));
+  return split_strings;
+}
+
+static bool _ImageIsCompatibleWithEnv(__tgt_image_info *image_info,
+                                      __tgt_active_offload_env *active_env) {
+  // get_image_info will return null if no image information was registered.
+  // If no image information, assume application built with old compiler and
+  // check each image.
+  if (!image_info)
+    return true;
+
+  if (!active_env->capabilities)
+    return false;
+
+  // Each runtime requirement for the compiled image is stored in
+  // the image_info->offload_arch (TargetID) string.
+  // Each runtime capability obtained from "offload-arch -c" is stored in
+  // actvie_env->capabilities (TargetID) string.
+  // If every requirement has a matching capability, then the image
+  // is compatible with active environment
+
+  std::vector<std::string> reqs = _splitstrings(image_info->offload_arch, ":");
+  std::vector<std::string> caps = _splitstrings(active_env->capabilities, ":");
+
+  bool is_compatible = true;
+  for (auto req : reqs) {
+    bool missing_capability = true;
+    for (auto capability : caps)
+      if (capability == req)
+        missing_capability = false;
+    if (missing_capability) {
+      DP("Image requires %s but runtime capability %s is missing.\n",
+         image_info->offload_arch, req.c_str());
+      is_compatible = false;
+    }
+  }
+  return is_compatible;
+}
+
+#define MAX_CAPS_STR_SIZE 1024
 void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
+
+  // Get the current active offload environment
+  __tgt_active_offload_env offload_env = {nullptr};
+  // Need a buffer to hold results of offload-arch -c command
+  size_t offload_arch_output_buffer_size = MAX_CAPS_STR_SIZE;
+  std::vector<char> offload_arch_output_buffer;
+  offload_arch_output_buffer.resize(offload_arch_output_buffer_size);
+  __tgt_get_active_offload_env(&offload_env, offload_arch_output_buffer.data(),
+                               offload_arch_output_buffer_size);
+
+  RTLInfoTy *FoundRTL = NULL;
   PM->RTLsMtx.lock();
   // Register the images with the RTLs that understand them, if any.
   for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
     // Obtain the image.
     __tgt_device_image *img = &desc->DeviceImages[i];
 
-    RTLInfoTy *FoundRTL = nullptr;
-
+    // Get corresponding image info offload_arch and check with runtime
+    if (!_ImageIsCompatibleWithEnv(img->ImageInfo, &offload_env))
+      continue;
+    FoundRTL = NULL;
     // Scan the RTLs that have associated images until we find one that supports
     // the current image.
     for (auto &R : AllRTLs) {
+
       if (!R.is_valid_binary(img)) {
         DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
            DPxPTR(img->ImageStart), R.RTLName.c_str());
@@ -407,6 +500,39 @@
   }
   PM->RTLsMtx.unlock();
 
+  if (!FoundRTL) {
+    if (PM->TargetOffloadPolicy == tgt_mandatory)
+      fprintf(stderr, "ERROR:\
+	Runtime capabilities do NOT meet any offload image offload_arch\n\
+	and the OMP_TARGET_OFFLOAD policy is mandatory.  Terminating!\n\
+	Runtime capabilities : %s\n",
+              offload_env.capabilities);
+    else if (PM->TargetOffloadPolicy == tgt_disabled)
+      fprintf(stderr, "WARNING: Offloading is disabled.\n");
+    else
+      fprintf(
+          stderr,
+          "WARNING: Runtime capabilities do NOT meet any image offload_arch.\n\
+	 So device offloading is now disabled.\n\
+	Runtime capabilities : %s\n",
+          offload_env.capabilities);
+    if (PM->TargetOffloadPolicy != tgt_disabled) {
+      for (int32_t i = 0; i < desc->NumDeviceImages; ++i) {
+        __tgt_image_info *image_info = desc->DeviceImages[i].ImageInfo;
+        if (image_info)
+          fprintf(stderr, "\
+	  Image %d offload_arch : %s\n",
+                  i, image_info->offload_arch);
+        else
+          fprintf(stderr, "\
+	  Image %d has no offload_arch. Could be from older compiler\n",
+                  i);
+      }
+    }
+    if (PM->TargetOffloadPolicy == tgt_mandatory)
+      exit(1);
+  }
+
   DP("Done registering entries!\n");
 }
 
Index: openmp/libomptarget/include/omptarget.h
===================================================================
--- openmp/libomptarget/include/omptarget.h
+++ openmp/libomptarget/include/omptarget.h
@@ -119,12 +119,42 @@
   int32_t reserved; // Reserved, to be used by the runtime library.
 };
 
+/// __tgt_image_info:
+///
+/// The information in this struct is provided in the clang-linker-wrapper
+/// as a call to __tgt_register_image_info for each image in the library
+/// of images also created by the clang-linker-wrapper.
+/// __tgt_register_image_info is called for each image BEFORE the single
+/// call to __tgt_register_lib so that image information is available
+/// before they are loaded. clang-linker-wrapper gets this image information
+/// from command line arguments provided by the clang driver when it creates
+/// the call to the __clang-linker-wrapper command.
+/// This architecture allows the binary image (pointed to by ImageStart and
+/// ImageEnd in __tgt_device_image) to remain architecture indenendent.
+/// That is, the architecture independent part of the libomptarget runtime
+/// does not need to peer inside the image to determine if it is loadable
+/// even though in most cases the image is an elf object.
+/// There is one __tgt_image_info for each __tgt_device_image. For backward
+/// compabibility, no changes are allowed to either __tgt_device_image or
+/// __tgt_bin_desc. The absense of __tgt_image_info is the indication that
+/// the runtime is being used on a binary created by an old version of
+/// the compiler.
+///
+struct __tgt_image_info {
+  int32_t version;           // The version of this struct
+  int32_t image_number;      // Image number in image library starting from 0
+  int32_t number_images;     // Number of images, used for initial allocation
+  char *offload_arch;        // e.g. sm_30, sm_70, gfx906, includes features
+  char *compile_opts;        // reserved for future use
+};
+
 /// This struct is a record of the device image information
 struct __tgt_device_image {
   void *ImageStart;                  // Pointer to the target code start
   void *ImageEnd;                    // Pointer to the target code end
   __tgt_offload_entry *EntriesBegin; // Begin of table with all target entries
   __tgt_offload_entry *EntriesEnd;   // End of table (non inclusive)
+  __tgt_image_info *ImageInfo;       // Metadata about the image
 };
 
 /// This struct is a record of all the host code that may be offloaded to a
@@ -136,6 +166,15 @@
   __tgt_offload_entry *HostEntriesEnd;   // End of table (non inclusive)
 };
 
+/// __tgt_active_offload_env
+///
+/// This structure is created by __tgt_get_active_offload_env and is used
+/// to determine compatibility of the images with the current environment
+/// that is "in play".
+struct __tgt_active_offload_env {
+char *capabilities; // string returned by offload-arch -c
+};
+
 /// This struct contains the offload entries identified by the target runtime
 struct __tgt_target_table {
   __tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries
Index: clang/tools/clang-linker-wrapper/OffloadWrapper.h
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.h
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.h
@@ -14,8 +14,9 @@
 
 /// Wraps the input device images into the module \p M as global symbols and
 /// registers the images with the OpenMP Offloading runtime libomptarget.
-llvm::Error wrapOpenMPBinaries(llvm::Module &M,
-                               llvm::ArrayRef<llvm::ArrayRef<char>> Images);
+llvm::Error
+wrapOpenMPBinaries(llvm::Module &M, llvm::ArrayRef<llvm::ArrayRef<char>> Images,
+                   llvm::ArrayRef<llvm::ArrayRef<char>> OffloadArchs);
 
 /// Wraps the input fatbinary image into the module \p M as global symbols and
 /// registers the images with the CUDA runtime.
Index: clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
+++ clang/tools/clang-linker-wrapper/OffloadWrapper.cpp
@@ -55,11 +55,35 @@
   return PointerType::getUnqual(getEntryTy(M));
 }
 
+// This matches the runtime struct definition of __tgt_image_info
+// declared in openmp/libomptarget/include/omptarget.h /
+// struct __tgt_image_info {
+//   int32_t version;
+//   int32_t image_number;
+//   int32_t number_images;
+//   char* offload_arch;
+//   char* target_compile_opts;
+// };
+StructType *getImageInfoTy(Module &M) {
+  LLVMContext &C = M.getContext();
+  StructType *ImageInfoTy = StructType::getTypeByName(C, "__tgt_image_info");
+  if (!ImageInfoTy)
+    ImageInfoTy = StructType::create(
+        "__tgt_image_info", Type::getInt32Ty(C), Type::getInt32Ty(C),
+        Type::getInt32Ty(C), Type::getInt8PtrTy(C), Type::getInt8PtrTy(C));
+  return ImageInfoTy;
+}
+
+PointerType *getImageInfoPtrTy(Module &M) {
+  return PointerType::getUnqual(getImageInfoTy(M));
+}
+
 // struct __tgt_device_image {
 //   void *ImageStart;
 //   void *ImageEnd;
 //   __tgt_offload_entry *EntriesBegin;
 //   __tgt_offload_entry *EntriesEnd;
+//   __tgt_image_info *ImageInfo;
 // };
 StructType *getDeviceImageTy(Module &M) {
   LLVMContext &C = M.getContext();
@@ -67,7 +91,7 @@
   if (!ImageTy)
     ImageTy = StructType::create("__tgt_device_image", Type::getInt8PtrTy(C),
                                  Type::getInt8PtrTy(C), getEntryPtrTy(M),
-                                 getEntryPtrTy(M));
+                                 getEntryPtrTy(M), getImageInfoPtrTy(M));
   return ImageTy;
 }
 
@@ -114,14 +138,16 @@
 ///     Image0,                            /*ImageStart*/
 ///     Image0 + sizeof(Image0),           /*ImageEnd*/
 ///     __start_omp_offloading_entries,    /*EntriesBegin*/
-///     __stop_omp_offloading_entries      /*EntriesEnd*/
+///     __stop_omp_offloading_entries,     /*EntriesEnd*/
+///     __tgt_image_info                   /*ImageInfo*/
 ///   },
 ///   ...
 ///   {
 ///     ImageN,                            /*ImageStart*/
 ///     ImageN + sizeof(ImageN),           /*ImageEnd*/
 ///     __start_omp_offloading_entries,    /*EntriesBegin*/
-///     __stop_omp_offloading_entries      /*EntriesEnd*/
+///     __stop_omp_offloading_entries,      /*EntriesEnd*/
+///     __tgt_image_info                   /*ImageInfo*/
 ///   }
 /// };
 ///
@@ -133,7 +159,8 @@
 /// };
 ///
 /// Global variable that represents BinDesc is returned.
-GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs) {
+GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
+                              ArrayRef<ArrayRef<char>> OffloadArchs) {
   LLVMContext &C = M.getContext();
   // Create external begin/end symbols for the offload entries table.
   auto *EntriesB = new GlobalVariable(
@@ -161,6 +188,11 @@
   auto *Zero = ConstantInt::get(getSizeTTy(M), 0u);
   Constant *ZeroZero[] = {Zero, Zero};
 
+  auto *NullPtr = llvm::ConstantPointerNull::get(Type::getInt8PtrTy(C));
+  unsigned int ImgCount = 0;
+  std::string OffloadArchBase = "__offload_arch";
+  std::string OffloadImageBase = "offload_image_info";
+
   // Create initializer for the images array.
   SmallVector<Constant *, 4u> ImagesInits;
   ImagesInits.reserve(Bufs.size());
@@ -179,8 +211,44 @@
     auto *ImageE =
         ConstantExpr::getGetElementPtr(Image->getValueType(), Image, ZeroSize);
 
-    ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(M), ImageB,
-                                              ImageE, EntriesB, EntriesE));
+    auto OArch = OffloadArchs[ImgCount];
+    Constant *OArchV = ConstantDataArray::get(C, OArch);
+    std::string OffloadArchGV(OffloadArchBase),
+        OffloadImageGV(OffloadImageBase);
+    if (ImgCount) {
+      auto Suffix = std::to_string(ImgCount);
+      OffloadArchGV.append(".").append(Suffix);
+      OffloadImageGV.append(".").append(Suffix);
+    }
+
+    auto *GV =
+        new GlobalVariable(M, OArchV->getType(), /*isConstant*/ true,
+                           GlobalValue::InternalLinkage, OArchV, OffloadArchGV);
+    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+    // store value of these variables (i.e. offload archs) into a custom
+    // section which will be used by "offload-arch -f". It won't be
+    // removed during binary stripping.
+    GV->setSection(".offload_arch_list");
+
+    auto *RequirementVPtr =
+        ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zero);
+    RequirementVPtr =
+        ConstantExpr::getBitCast(RequirementVPtr, Type::getInt8PtrTy(C));
+    auto *InfoInit = ConstantStruct::get(
+        getImageInfoTy(M), ConstantInt::get(Type::getInt32Ty(C), 1),
+        ConstantInt::get(Type::getInt32Ty(C), ImgCount++),
+        ConstantInt::get(Type::getInt32Ty(C), (uint32_t)OffloadArchs.size()),
+        RequirementVPtr,
+        NullPtr // TODO: capture target-compile-opts from clang driver
+    );
+    auto *ImageInfoGV =
+        new GlobalVariable(M, InfoInit->getType(),
+                           /*isConstant*/ true, GlobalValue::InternalLinkage,
+                           InfoInit, OffloadImageGV);
+    ImageInfoGV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+    ImagesInits.push_back(ConstantStruct::get(
+        getDeviceImageTy(M), ImageB, ImageE, EntriesB, EntriesE, ImageInfoGV));
   }
 
   // Then create images array.
@@ -531,8 +599,9 @@
 
 } // namespace
 
-Error wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images) {
-  GlobalVariable *Desc = createBinDesc(M, Images);
+Error wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images,
+                         ArrayRef<ArrayRef<char>> OffloadArchs) {
+  GlobalVariable *Desc = createBinDesc(M, Images, OffloadArchs);
   if (!Desc)
     return createStringError(inconvertibleErrorCode(),
                              "No binary descriptors created.");
Index: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
===================================================================
--- clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -610,6 +610,8 @@
   CmdArgs.push_back("gnu");
   CmdArgs.push_back("--no-undefined");
   CmdArgs.push_back("-shared");
+  std::string ArchArg = std::string("-plugin-opt=mcpu=").append(Arch.str());
+  CmdArgs.push_back(ArchArg);
   CmdArgs.push_back("-o");
   CmdArgs.push_back(TempFile);
 
@@ -1064,9 +1066,10 @@
     Triple TheTriple = Triple(File.TheTriple);
     auto &LinkerInputFiles = LinkerInput.getSecond();
     bool WholeProgram = false;
+    std::string TheArch = File.Arch;
 
     // Run LTO on any bitcode files and replace the input with the result.
-    if (Error Err = linkBitcodeFiles(LinkerInputFiles, TheTriple, File.Arch,
+    if (Error Err = linkBitcodeFiles(LinkerInputFiles, TheTriple, TheArch,
                                      WholeProgram))
       return Err;
 
@@ -1075,7 +1078,7 @@
       if (LinkerInputFiles.size() != 1 || !WholeProgram)
         return createStringError(inconvertibleErrorCode(),
                                  "Unable to embed bitcode image for JIT");
-      LinkedImages.emplace_back(OFK_OpenMP, TheTriple.getTriple(), File.Arch,
+      LinkedImages.emplace_back(OFK_OpenMP, TheTriple.getTriple(), TheArch,
                                 LinkerInputFiles.front());
       continue;
     }
@@ -1086,18 +1089,18 @@
         return createStringError(inconvertibleErrorCode(),
                                  "Invalid number of inputs for non-RDC mode");
       for (OffloadKind Kind : ActiveOffloadKinds[LinkerInput.getFirst()])
-        LinkedImages.emplace_back(Kind, TheTriple.getTriple(), File.Arch,
+        LinkedImages.emplace_back(Kind, TheTriple.getTriple(), TheArch,
                                   LinkerInputFiles.front());
       continue;
     }
 
-    auto ImageOrErr = linkDevice(LinkerInputFiles, TheTriple, File.Arch);
+    auto ImageOrErr = linkDevice(LinkerInputFiles, TheTriple, TheArch);
     if (!ImageOrErr)
       return ImageOrErr.takeError();
 
     // Create separate images for all the active offload kinds.
     for (OffloadKind Kind : ActiveOffloadKinds[LinkerInput.getFirst()])
-      LinkedImages.emplace_back(Kind, TheTriple.getTriple(), File.Arch,
+      LinkedImages.emplace_back(Kind, TheTriple.getTriple(), TheArch,
                                 *ImageOrErr);
   }
   return Error::success();
@@ -1147,6 +1150,9 @@
 Error wrapOpenMPImages(Module &M, ArrayRef<DeviceFile> Images) {
   SmallVector<std::unique_ptr<MemoryBuffer>, 4> SavedBuffers;
   SmallVector<ArrayRef<char>, 4> ImagesToWrap;
+  SmallVector<ArrayRef<char>, 4> OffloadArchs;
+  std::string Arch;
+  OffloadArchs.reserve(Images.size());
   for (const DeviceFile &File : Images) {
     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
         llvm::MemoryBuffer::getFileOrSTDIN(File.Filename);
@@ -1155,9 +1161,19 @@
     ImagesToWrap.emplace_back((*ImageOrError)->getBufferStart(),
                               (*ImageOrError)->getBufferSize());
     SavedBuffers.emplace_back(std::move(*ImageOrError));
+
+    if (Arch.empty()) {
+      Arch = std::string(File.Arch);
+      Arch.push_back('\0');
+      OffloadArchs.emplace_back(Arch.data(), Arch.size());
+    } else {
+      auto curSize = Arch.size();
+      Arch.append(File.Arch);
+      OffloadArchs.emplace_back(&(Arch.at(curSize)), File.Arch.size());
+    }
   }
 
-  if (Error Err = wrapOpenMPBinaries(M, ImagesToWrap))
+  if (Error Err = wrapOpenMPBinaries(M, ImagesToWrap, OffloadArchs))
     return Err;
   return Error::success();
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to