[Openmp-commits] [openmp] LazyInitPlugin (PR #76174)

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Thu Dec 21 11:30:31 PST 2023


https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/76174

- [Libomptarget] Rework image checking further
- [Libomptarget] Only initialize a plugin if a device image needs it


>From f92576ca7acbf2a645ba414b72e8ef3bccf1d320 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 20 Dec 2023 21:31:50 -0600
Subject: [PATCH 1/2] [Libomptarget] Rework image checking further

Summary:
In the future, we may have more checks for different kinds of inputs,
e.g. SPIR-V. This patch simply reworks the handling to be more generic
and do the magic detection up-front. The checks inside the routines are
now asserts so we don't spend time checking this stuff over and over
again.

This patch also tweaked the bitcode check. I used a different function
to get the Lazy-IR module now, as it returns the raw expected value
rather than the SM diganostic.

No functionality change intended.
---
 .../plugins-nextgen/common/include/JIT.h      |  2 +-
 .../common/include/PluginInterface.h          |  2 +-
 .../plugins-nextgen/common/src/JIT.cpp        | 24 ++++-----
 .../common/src/PluginInterface.cpp            | 52 +++++++++++--------
 .../plugins-nextgen/common/src/Utils/ELF.cpp  |  3 +-
 5 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/openmp/libomptarget/plugins-nextgen/common/include/JIT.h b/openmp/libomptarget/plugins-nextgen/common/include/JIT.h
index 3ec4424f856a00..b22197b8920838 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/JIT.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/JIT.h
@@ -57,7 +57,7 @@ struct JITEngine {
 
   /// Return true if \p Image is a bitcode image that can be JITed for the given
   /// architecture.
-  bool checkBitcodeImage(const __tgt_device_image &Image);
+  Expected<bool> checkBitcodeImage(StringRef Buffer) const;
 
 private:
   /// Compile the bitcode image \p Image and generate the binary image that can
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index cf02783d8b3388..b85dc146d86d2f 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -1067,7 +1067,7 @@ struct GenericPluginTy {
 
   /// Top level interface to verify if a given ELF image can be executed on a
   /// given target. Returns true if the \p Image is compatible with the plugin.
-  Expected<bool> checkELFImage(__tgt_device_image &Image) const;
+  Expected<bool> checkELFImage(StringRef Image) const;
 
   /// Indicate if an image is compatible with the plugin devices. Notice that
   /// this function may be called before actually initializing the devices. So
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp
index 08080c9d6091bb..7275be4edfca5b 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/JIT.cpp
@@ -330,24 +330,18 @@ JITEngine::process(const __tgt_device_image &Image,
   return &Image;
 }
 
-bool JITEngine::checkBitcodeImage(const __tgt_device_image &Image) {
+Expected<bool> JITEngine::checkBitcodeImage(StringRef Buffer) const {
   TimeTraceScope TimeScope("Check bitcode image");
 
-  if (!isImageBitcode(Image))
-    return false;
-
-  StringRef Data(reinterpret_cast<const char *>(Image.ImageStart),
-                 target::getPtrDiff(Image.ImageEnd, Image.ImageStart));
-  auto MB = MemoryBuffer::getMemBuffer(Data, /*BufferName=*/"",
-                                       /*RequiresNullTerminator=*/false);
-  if (!MB)
-    return false;
+  assert(identify_magic(Buffer) == file_magic::bitcode &&
+         "Input is not bitcode");
 
   LLVMContext Context;
-  SMDiagnostic Diagnostic;
-  std::unique_ptr<Module> M =
-      llvm::getLazyIRModule(std::move(MB), Diagnostic, Context,
-                            /*ShouldLazyLoadMetadata=*/true);
+  auto ModuleOrErr = getLazyBitcodeModule(MemoryBufferRef(Buffer, ""), Context,
+                                          /*ShouldLazyLoadMetadata=*/true);
+  if (!ModuleOrErr)
+    return ModuleOrErr.takeError();
+  Module &M = **ModuleOrErr;
 
-  return M && Triple(M->getTargetTriple()).getArch() == TT.getArch();
+  return Triple(M.getTargetTriple()).getArch() == TT.getArch();
 }
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 178c60a77ab51f..be9ace571f54fd 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1632,16 +1632,13 @@ Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
   return Plugin::success();
 }
 
-Expected<bool> GenericPluginTy::checkELFImage(__tgt_device_image &Image) const {
-  StringRef Buffer(reinterpret_cast<const char *>(Image.ImageStart),
-                   target::getPtrDiff(Image.ImageEnd, Image.ImageStart));
-
+Expected<bool> GenericPluginTy::checkELFImage(StringRef Image) const {
   // First check if this image is a regular ELF file.
-  if (!utils::elf::isELF(Buffer))
+  if (!utils::elf::isELF(Image))
     return false;
 
   // Check if this image is an ELF with a matching machine value.
-  auto MachineOrErr = utils::elf::checkMachine(Buffer, getMagicElfBits());
+  auto MachineOrErr = utils::elf::checkMachine(Image, getMagicElfBits());
   if (!MachineOrErr)
     return MachineOrErr.takeError();
 
@@ -1649,7 +1646,7 @@ Expected<bool> GenericPluginTy::checkELFImage(__tgt_device_image &Image) const {
     return false;
 
   // Perform plugin-dependent checks for the specific architecture if needed.
-  return isELFCompatible(Buffer);
+  return isELFCompatible(Image);
 }
 
 const bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
@@ -1678,27 +1675,38 @@ int32_t __tgt_rtl_init_plugin() {
   return OFFLOAD_SUCCESS;
 }
 
-int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *TgtImage) {
-  // TODO: We should be able to perform a trivial ELF machine check without
-  // initializing the plugin first to save time if the plugin is not needed.
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
   if (!Plugin::isActive())
     return false;
 
-  // Check if this is a valid ELF with a matching machine and processor.
-  auto MatchOrErr = Plugin::get().checkELFImage(*TgtImage);
-  if (Error Err = MatchOrErr.takeError()) {
+  StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
+                   target::getPtrDiff(Image->ImageEnd, Image->ImageStart));
+
+  auto HandleError = [&](Error Err) -> bool {
     [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
-    DP("Failure to check validity of image %p: %s", TgtImage, ErrStr.c_str());
+    DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str());
+    return false;
+  };
+  switch (identify_magic(Buffer)) {
+  case file_magic::elf:
+  case file_magic::elf_relocatable:
+  case file_magic::elf_executable:
+  case file_magic::elf_shared_object:
+  case file_magic::elf_core: {
+    auto MatchOrErr = Plugin::get().checkELFImage(Buffer);
+    if (Error Err = MatchOrErr.takeError())
+      return HandleError(std::move(Err));
+    return *MatchOrErr;
+  }
+  case file_magic::bitcode: {
+    auto MatchOrErr = Plugin::get().getJIT().checkBitcodeImage(Buffer);
+    if (Error Err = MatchOrErr.takeError())
+      return HandleError(std::move(Err));
+    return *MatchOrErr;
+  }
+  default:
     return false;
-  } else if (*MatchOrErr) {
-    return true;
   }
-
-  // Check if this is a valid LLVM-IR file with matching triple.
-  if (Plugin::get().getJIT().checkBitcodeImage(*TgtImage))
-    return true;
-
-  return false;
 }
 
 int32_t __tgt_rtl_supports_empty_images() {
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp b/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
index bdac6c1db5d23a..c84c3bad5def0a 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -37,8 +37,7 @@ bool utils::elf::isELF(StringRef Buffer) {
 }
 
 Expected<bool> utils::elf::checkMachine(StringRef Object, uint16_t EMachine) {
-  if (!isELF(Object))
-    return createError("Input is not an ELF.");
+  assert(isELF(Object) && "Input is not an ELF!");
 
   Expected<ELF64LEObjectFile> ElfOrErr =
       ELF64LEObjectFile::create(MemoryBufferRef(Object, /*Identifier=*/""),

>From 31a6cb7e3e9aa4441cf1ece173f6872982fb515e Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 21 Dec 2023 11:41:40 -0600
Subject: [PATCH 2/2] [Libomptarget] Only initialize a plugin if a device image
 needs it

Summary:
This patch changes the image registration logic. Currently, we
initialize every single plugin even if no images will end up using it.
This patch adds a new pair of runtime calls. The first one checks if the
plugin has been initialized or is active. The second does a light-weight
check on the ELF machine to prevent it from needing to initialize a
plugin with a trivially incompatible image.

My machine has CUDA and AMDGPU installed. This patch saves roughly 20ms
when compiling only for the AMDGPU device with a trivial program. and
roughly 30ms when compiling only for the NVPTX device with a trivial
program.
---
 openmp/libomptarget/include/PluginManager.h   |  4 ++
 .../libomptarget/include/Shared/PluginAPI.h   | 15 ++++-
 .../libomptarget/include/Shared/PluginAPI.inc |  2 +
 .../plugins-nextgen/amdgpu/src/rtl.cpp        |  4 +-
 .../common/include/PluginInterface.h          |  5 +-
 .../common/src/PluginInterface.cpp            | 26 +++++++++
 .../plugins-nextgen/cuda/src/rtl.cpp          |  4 +-
 .../generic-elf-64bit/src/rtl.cpp             |  4 +-
 openmp/libomptarget/src/PluginManager.cpp     | 57 +++++++++++++------
 9 files changed, 99 insertions(+), 22 deletions(-)

diff --git a/openmp/libomptarget/include/PluginManager.h b/openmp/libomptarget/include/PluginManager.h
index a0499c37504c0d..7128e14bdf6fc0 100644
--- a/openmp/libomptarget/include/PluginManager.h
+++ b/openmp/libomptarget/include/PluginManager.h
@@ -49,6 +49,10 @@ struct PluginAdaptorTy {
   /// that fail to initialize are ignored.
   void initDevices(PluginManager &PM);
 
+  /// Initialize as many devices as possible for this plugin adaptor. Devices
+  /// that fail to initialize are ignored.
+  llvm::Error initPlugin();
+
   bool isUsed() const { return DeviceOffset >= 0; }
 
   /// Return the number of devices visible to the underlying plugin.
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h
index c6aacf4ce2124b..2a2eebb06bfea4 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.h
+++ b/openmp/libomptarget/include/Shared/PluginAPI.h
@@ -20,14 +20,27 @@
 #include "Shared/APITypes.h"
 
 extern "C" {
-
 // First method called on the plugin
 int32_t __tgt_rtl_init_plugin();
 
+// Returns non-zero if the plugin has been initialized.
+int32_t __tgt_rtl_is_plugin_active();
+
 // Return the number of available devices of the type supported by the
 // target RTL.
 int32_t __tgt_rtl_number_of_devices(void);
 
+// Returns a non-zero integer if the given Image is capable of being run by the
+// plugin. This is intended to be called without initializing the plugin.
+int32_t __tgt_rtl_is_binary_compatible(__tgt_device_image *Image);
+
+// Return an integer different from zero if the provided device image can be
+// supported by the runtime. The functionality is similar to comparing the
+// result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
+// lightweight query to determine if the RTL is suitable for an image without
+// having to load the library, which can be expensive.
+int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
+
 // Return an integer different from zero if the provided device image can be
 // supported by the runtime. The functionality is similar to comparing the
 // result of __tgt__rtl__load__binary to NULL. However, this is meant to be a
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc
index 25ebe7d437f9d1..7b24c459eda3b1 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.inc
+++ b/openmp/libomptarget/include/Shared/PluginAPI.inc
@@ -14,6 +14,8 @@
 // No include guards!
 
 PLUGIN_API_HANDLE(init_plugin, true);
+PLUGIN_API_HANDLE(is_plugin_active, true);
+PLUGIN_API_HANDLE(is_binary_compatible, true);
 PLUGIN_API_HANDLE(is_valid_binary, true);
 PLUGIN_API_HANDLE(is_data_exchangable, false);
 PLUGIN_API_HANDLE(number_of_devices, true);
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index fe435a3f558557..6d9e2cb30da7fc 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -56,6 +56,8 @@
 #include "hsa/hsa_ext_amd.h"
 #endif
 
+extern const uint16_t llvm::omp::target::plugin::ELFMachine = ELF::EM_AMDGPU;
+
 namespace llvm {
 namespace omp {
 namespace target {
@@ -3013,7 +3015,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
   Triple::ArchType getTripleArch() const override { return Triple::amdgcn; }
 
   /// Get the ELF code for recognizing the compatible image binary.
-  uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
+  uint16_t getMagicElfBits() const override { return ELFMachine; }
 
   /// Check whether the image is compatible with an AMDGPU device.
   Expected<bool> isELFCompatible(StringRef Image) const override {
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index b85dc146d86d2f..012c4ccbd87db4 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -48,9 +48,12 @@
 namespace llvm {
 namespace omp {
 namespace target {
-
 namespace plugin {
 
+/// The plugin's native ELF architecture. This should be defined individually
+/// by each plugin, and ELF:EM_NONE if the ELF target is not applicable.
+extern const uint16_t ELFMachine;
+
 struct GenericPluginTy;
 struct GenericKernelTy;
 struct GenericDeviceTy;
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index be9ace571f54fd..9b63e6bd4b31b6 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1675,6 +1675,32 @@ int32_t __tgt_rtl_init_plugin() {
   return OFFLOAD_SUCCESS;
 }
 
+int32_t __tgt_rtl_is_plugin_active() { return Plugin::isActive(); }
+
+int32_t __tgt_rtl_is_binary_compatible(__tgt_device_image *Image) {
+  assert(!Plugin::isActive() && "Should not be called after initialization");
+
+  StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
+                   target::getPtrDiff(Image->ImageEnd, Image->ImageStart));
+
+  switch (identify_magic(Buffer)) {
+  case file_magic::elf:
+  case file_magic::elf_executable:
+  case file_magic::elf_shared_object: {
+    // Check if this image is an ELF with a matching machine value.
+    auto MachineOrErr = utils::elf::checkMachine(Buffer, ELFMachine);
+    if (Error Err = MachineOrErr.takeError()) {
+      consumeError(std::move(Err));
+      return false;
+    }
+    return *MachineOrErr;
+  }
+  default:
+    // Assume this image is compatible and check if it's a valid binary later.
+    return true;
+  }
+}
+
 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
   if (!Plugin::isActive())
     return false;
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index b0dff917dd0be0..624340af5c1f93 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -29,6 +29,8 @@
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/Error.h"
 
+extern const uint16_t llvm::omp::target::plugin::ELFMachine = ELF::EM_CUDA;
+
 namespace llvm {
 namespace omp {
 namespace target {
@@ -1275,7 +1277,7 @@ struct CUDAPluginTy final : public GenericPluginTy {
   Error deinitImpl() override { return Plugin::success(); }
 
   /// Get the ELF code for recognizing the compatible image binary.
-  uint16_t getMagicElfBits() const override { return ELF::EM_CUDA; }
+  uint16_t getMagicElfBits() const override { return ELFMachine; }
 
   Triple::ArchType getTripleArch() const override {
     // TODO: I think we can drop the support for 32-bit NVPTX devices.
diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
index 88b5236d31f482..499a3e37d44c62 100644
--- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -38,6 +38,8 @@
 #define TARGET_ELF_ID ELF::EM_NONE
 #endif
 
+extern const uint16_t llvm::omp::target::plugin::ELFMachine = TARGET_ELF_ID;
+
 namespace llvm {
 namespace omp {
 namespace target {
@@ -389,7 +391,7 @@ struct GenELF64PluginTy final : public GenericPluginTy {
   Error deinitImpl() override { return Plugin::success(); }
 
   /// Get the ELF code to recognize the compatible binary images.
-  uint16_t getMagicElfBits() const override { return TARGET_ELF_ID; }
+  uint16_t getMagicElfBits() const override { return ELFMachine; }
 
   /// This plugin does not support exchanging data between two devices.
   bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
diff --git a/openmp/libomptarget/src/PluginManager.cpp b/openmp/libomptarget/src/PluginManager.cpp
index da2e08180eead8..510dc54774ab7f 100644
--- a/openmp/libomptarget/src/PluginManager.cpp
+++ b/openmp/libomptarget/src/PluginManager.cpp
@@ -69,23 +69,6 @@ Error PluginAdaptorTy::init() {
 #include "Shared/PluginAPI.inc"
 #undef PLUGIN_API_HANDLE
 
-  // Remove plugin on failure to call optional init_plugin
-  int32_t Rc = init_plugin();
-  if (Rc != OFFLOAD_SUCCESS) {
-    return createStringError(inconvertibleErrorCode(),
-                             "Unable to initialize library '%s': %u!\n",
-                             Name.c_str(), Rc);
-  }
-
-  // No devices are supported by this RTL?
-  NumberOfPluginDevices = number_of_devices();
-  if (!NumberOfPluginDevices) {
-    return createStringError(inconvertibleErrorCode(),
-                             "No devices supported in this RTL\n");
-  }
-
-  DP("Registered '%s' with %d plugin visible devices!\n", Name.c_str(),
-     NumberOfPluginDevices);
   return Error::success();
 }
 
@@ -162,6 +145,28 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {
      NumberOfPluginDevices);
 }
 
+Error PluginAdaptorTy::initPlugin() {
+  // Remove plugin on failure to call optional init_plugin
+  int32_t Rc = init_plugin();
+  if (Rc != OFFLOAD_SUCCESS) {
+    return createStringError(inconvertibleErrorCode(),
+                             "Unable to initialize library '%s': %u!\n",
+                             Name.c_str(), Rc);
+  }
+
+  // No devices are supported by this RTL?
+  NumberOfPluginDevices = number_of_devices();
+  if (!NumberOfPluginDevices) {
+    return createStringError(inconvertibleErrorCode(),
+                             "No devices supported in this RTL\n");
+  }
+
+  DP("Registered '%s' with %d plugin visible devices!\n", Name.c_str(),
+     NumberOfPluginDevices);
+
+  return Error::success();
+}
+
 void PluginManager::initAllPlugins() {
   for (auto &R : PluginAdaptors)
     R->initDevices(*this);
@@ -203,6 +208,24 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
   for (int32_t i = 0; i < Desc->NumDeviceImages; ++i)
     PM->addDeviceImage(*Desc, Desc->DeviceImages[i]);
 
+  // Initialize any needed plugins using the image metadata if needed.
+  for (auto &R : PM->pluginAdaptors()) {
+    if (R.is_plugin_active())
+      continue;
+
+    // We can skip initializing this image if there are no images for it.
+    for (DeviceImageTy &DI : PM->deviceImages()) {
+      if (R.is_binary_compatible(&DI.getExecutableImage())) {
+        if (Error Err = R.initPlugin()) {
+          [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+          DP("Failed to init plugin: %s", InfoMsg.c_str());
+        }
+
+        break;
+      }
+    }
+  }
+
   // Register the images with the RTLs that understand them, if any.
   for (DeviceImageTy &DI : PM->deviceImages()) {
     // Obtain the image and information that was previously extracted.



More information about the Openmp-commits mailing list