[Openmp-commits] [openmp] [Libomptarget] Remove __tgt_image_info and use the ELF directly (PR #75720)

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Sat Dec 16 15:03:14 PST 2023


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/75720

>From 749c7649f439231de520c998268e39ef7e79b3bb Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Sat, 16 Dec 2023 16:48:22 -0600
Subject: [PATCH] [Libomptarget] Remove __tgt_image_info and use the ELF
 directly

Summary:
This patch reorganizes a lot of the code used to check for compatibility
with the current environment. The main bulk of this patch involves
moving from using a separate `__tgt_image_info` struct (which just
contains a string for the architecture) to instead simply checking this
information from the ELF directly. Checking information in the ELF is
very inexpensive as creating an ELF file is simply writing a base
pointer.

The main desire to do this was to reorganize everything into the ELF
image. We can then do the majority of these checks without first
initializing the plugin. A future patch will move the first ELF checks
to happen without initializing the plugin so we no longer need to
initialize and plugins that don't have needed images.

This patch also adds a lot more sanity checks for whether or not the ELF
is actually compatible. Such as if the images have a valid ABI, 64-bit
width, executable, etc.
---
 openmp/libomptarget/include/DeviceImage.h     |   7 -
 openmp/libomptarget/include/Shared/APITypes.h |   5 -
 .../libomptarget/include/Shared/PluginAPI.h   |   6 -
 .../libomptarget/include/Shared/PluginAPI.inc |   1 -
 .../plugins-nextgen/amdgpu/src/rtl.cpp        |  15 +-
 .../amdgpu/utils/UtilitiesRTL.h               | 125 ++++-----
 .../common/include/PluginInterface.h          |   6 +-
 .../common/{src => include}/Utils/ELF.h       |  14 +-
 .../common/src/PluginInterface.cpp            |  62 ++---
 .../plugins-nextgen/common/src/Utils/ELF.cpp  | 255 +++++++++++++++---
 .../plugins-nextgen/cuda/src/rtl.cpp          |  10 +-
 .../generic-elf-64bit/src/rtl.cpp             |   6 +-
 openmp/libomptarget/src/DeviceImage.cpp       |   1 -
 openmp/libomptarget/src/PluginManager.cpp     |   9 +-
 openmp/libomptarget/src/omptarget.cpp         |   7 +-
 15 files changed, 335 insertions(+), 194 deletions(-)
 rename openmp/libomptarget/plugins-nextgen/common/{src => include}/Utils/ELF.h (75%)

diff --git a/openmp/libomptarget/include/DeviceImage.h b/openmp/libomptarget/include/DeviceImage.h
index 465bf970ef17fe..63b4b6d14e0ef4 100644
--- a/openmp/libomptarget/include/DeviceImage.h
+++ b/openmp/libomptarget/include/DeviceImage.h
@@ -30,20 +30,13 @@ class DeviceImageTy {
 
   __tgt_bin_desc *BinaryDesc;
   __tgt_device_image Image;
-  __tgt_image_info ImageInfo;
 
 public:
   DeviceImageTy(__tgt_bin_desc &BinaryDesc, __tgt_device_image &Image);
 
   __tgt_device_image &getExecutableImage() { return Image; }
-  __tgt_image_info &getImageInfo() { return ImageInfo; }
   __tgt_bin_desc &getBinaryDesc() { return *BinaryDesc; }
 
-  llvm::StringRef
-  getArch(llvm::StringRef DefaultArch = llvm::StringRef()) const {
-    return ImageInfo.Arch ? ImageInfo.Arch : DefaultArch;
-  }
-
   auto entries() { return llvm::make_pointee_range(OffloadEntries); }
 };
 
diff --git a/openmp/libomptarget/include/Shared/APITypes.h b/openmp/libomptarget/include/Shared/APITypes.h
index 8e2aee2deb2957..763a22f0a5e863 100644
--- a/openmp/libomptarget/include/Shared/APITypes.h
+++ b/openmp/libomptarget/include/Shared/APITypes.h
@@ -46,11 +46,6 @@ struct __tgt_device_info {
   void *Device = nullptr;
 };
 
-/// This struct contains information about a given image.
-struct __tgt_image_info {
-  const char *Arch;
-};
-
 /// This struct is a record of all the host code that may be offloaded to a
 /// target.
 struct __tgt_bin_desc {
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.h b/openmp/libomptarget/include/Shared/PluginAPI.h
index 41d1908da21532..c6aacf4ce2124b 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.h
+++ b/openmp/libomptarget/include/Shared/PluginAPI.h
@@ -35,12 +35,6 @@ int32_t __tgt_rtl_number_of_devices(void);
 // having to load the library, which can be expensive.
 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image);
 
-// This provides the same functionality as __tgt_rtl_is_valid_binary except we
-// also use additional information to determine if the image is valid. This
-// allows us to determine if an image has a compatible architecture.
-int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *Image,
-                                       __tgt_image_info *Info);
-
 // Return an integer other than zero if the data can be exchaned from SrcDevId
 // to DstDevId. If it is data exchangable, the device plugin should provide
 // function to move data from source device to destination device directly.
diff --git a/openmp/libomptarget/include/Shared/PluginAPI.inc b/openmp/libomptarget/include/Shared/PluginAPI.inc
index 0949e4e593ddeb..25ebe7d437f9d1 100644
--- a/openmp/libomptarget/include/Shared/PluginAPI.inc
+++ b/openmp/libomptarget/include/Shared/PluginAPI.inc
@@ -15,7 +15,6 @@
 
 PLUGIN_API_HANDLE(init_plugin, true);
 PLUGIN_API_HANDLE(is_valid_binary, true);
-PLUGIN_API_HANDLE(is_valid_binary_info, false);
 PLUGIN_API_HANDLE(is_data_exchangable, false);
 PLUGIN_API_HANDLE(number_of_devices, true);
 PLUGIN_API_HANDLE(init_device, true);
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 0ffdabe5bcd420..347d7529e947b3 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -23,6 +23,7 @@
 #include "Shared/Debug.h"
 #include "Shared/Environment.h"
 #include "Shared/Utils.h"
+#include "Utils/ELF.h"
 
 #include "GlobalHandler.h"
 #include "OpenMP/OMPT/Callback.h"
@@ -3015,7 +3016,16 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
   uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }
 
   /// Check whether the image is compatible with an AMDGPU device.
-  Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
+  Expected<bool> isELFCompatible(StringRef Image) const override {
+    // Get the associated architecture from the ELF.
+    auto ProcessorOrErr = ::utils::elf::getProcessor(Image);
+    if (!ProcessorOrErr)
+      return ProcessorOrErr.takeError();
+
+    auto FlagsOrErr = ::utils::elf::getFlags(Image);
+    if (!FlagsOrErr)
+      return FlagsOrErr.takeError();
+
     for (hsa_agent_t Agent : KernelAgents) {
       std::string Target;
       auto Err = utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
@@ -3038,7 +3048,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
       if (Err)
         return std::move(Err);
 
-      if (!utils::isImageCompatibleWithEnv(Info, Target))
+      if (!utils::isImageCompatibleWithEnv(*ProcessorOrErr, *FlagsOrErr,
+                                           Target))
         return false;
     }
     return true;
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 289dbf8e3d09d1..1a2028ae7c24f9 100644
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -13,6 +13,7 @@
 #include <cstdint>
 
 #include "Shared/Debug.h"
+#include "Utils/ELF.h"
 
 #include "omptarget.h"
 
@@ -58,92 +59,58 @@ uint32_t getImplicitArgsSize(uint16_t Version) {
              : sizeof(AMDGPUImplicitArgsTy);
 }
 
-/// Parse a TargetID to get processor arch and feature map.
-/// Returns processor subarch.
-/// Returns TargetID features in \p FeatureMap argument.
-/// If the \p TargetID contains feature+, FeatureMap it to true.
-/// If the \p TargetID contains feature-, FeatureMap it to false.
-/// If the \p TargetID does not contain a feature (default), do not map it.
-StringRef parseTargetID(StringRef TargetID, StringMap<bool> &FeatureMap) {
-  if (TargetID.empty())
-    return llvm::StringRef();
-
-  auto ArchFeature = TargetID.split(":");
-  auto Arch = ArchFeature.first;
-  auto Features = ArchFeature.second;
-  if (Features.empty())
-    return Arch;
-
-  if (Features.contains("sramecc+")) {
-    FeatureMap.insert(std::pair<StringRef, bool>("sramecc", true));
-  } else if (Features.contains("sramecc-")) {
-    FeatureMap.insert(std::pair<StringRef, bool>("sramecc", false));
-  }
-  if (Features.contains("xnack+")) {
-    FeatureMap.insert(std::pair<StringRef, bool>("xnack", true));
-  } else if (Features.contains("xnack-")) {
-    FeatureMap.insert(std::pair<StringRef, bool>("xnack", false));
-  }
-
-  return Arch;
-}
-
-/// Check if an image is compatible with current system's environment.
-bool isImageCompatibleWithEnv(const __tgt_image_info *Info,
-                              StringRef EnvTargetID) {
-  llvm::StringRef ImageTargetID(Info->Arch);
-
-  // Compatible in case of exact match.
-  if (ImageTargetID == EnvTargetID) {
-    DP("Compatible: Exact match \t[Image: %s]\t:\t[Env: %s]\n",
-       ImageTargetID.data(), EnvTargetID.data());
-    return true;
-  }
-
-  // Incompatible if Archs mismatch.
-  StringMap<bool> ImgMap, EnvMap;
-  StringRef ImgArch = utils::parseTargetID(ImageTargetID, ImgMap);
-  StringRef EnvArch = utils::parseTargetID(EnvTargetID, EnvMap);
-
-  // Both EnvArch and ImgArch can't be empty here.
-  if (EnvArch.empty() || ImgArch.empty() || !ImgArch.contains(EnvArch)) {
-    DP("Incompatible: Processor mismatch \t[Image: %s]\t:\t[Env: %s]\n",
-       ImageTargetID.data(), EnvTargetID.data());
+/// Check if an image is compatible with current system's environment. The
+/// system environment is given as a 'target-id' which has the form:
+///
+/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
+///
+/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
+/// and is compatible with either '+' or '-'. The HSA runtime returns this
+/// information using the target-id, while we use the ELF header to determine
+/// these features.
+inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
+                                     StringRef EnvTargetID) {
+  StringRef EnvArch = EnvTargetID.split(":").first;
+
+  // Trivial check if the base processors match.
+  if (EnvArch != ImageArch)
     return false;
-  }
 
-  // Incompatible if image has more features than the environment,
-  // irrespective of type or sign of features.
-  if (ImgMap.size() > EnvMap.size()) {
-    DP("Incompatible: Image has more features than the Environment \t[Image: "
-       "%s]\t:\t[Env: %s]\n",
-       ImageTargetID.data(), EnvTargetID.data());
-    return false;
+  // Check if the image is requesting xnack on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
+  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
+    // The image is 'xnack-' so the environment cannot be 'xnack+'.
+    if (EnvTargetID.contains("xnack+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
+    // The image is 'xnack+' so the environment cannot be 'xnack-'.
+    if (EnvTargetID.contains("xnack-"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
+  default:
+    break;
   }
 
-  // Compatible if each target feature specified by the environment is
-  // compatible with target feature of the image. The target feature is
-  // compatible if the iamge does not specify it (meaning Any), or if it
-  // specifies it with the same value (meaning On or Off).
-  for (const auto &ImgFeature : ImgMap) {
-    auto EnvFeature = EnvMap.find(ImgFeature.first());
-    if (EnvFeature == EnvMap.end() ||
-        (EnvFeature->first() == ImgFeature.first() &&
-         EnvFeature->second != ImgFeature.second)) {
-      DP("Incompatible: Value of Image's non-ANY feature is not matching with "
-         "the Environment's non-ANY feature \t[Image: %s]\t:\t[Env: %s]\n",
-         ImageTargetID.data(), EnvTargetID.data());
+  // Check if the image is requesting sramecc on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
+  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
+    // The image is 'sramecc-' so the environment cannot be 'sramecc+'.
+    if (EnvTargetID.contains("sramecc-"))
       return false;
-    }
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
+    // The image is 'sramecc+' so the environment cannot be 'sramecc-'.
+    if (EnvTargetID.contains("sramecc+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
+    break;
   }
 
-  // Image is compatible if all features of Environment are:
-  //   - either, present in the Image's features map with the same sign,
-  //   - or, the feature is missing from Image's features map i.e. it is
-  //   set to ANY
-  DP("Compatible: Target IDs are compatible \t[Image: %s]\t:\t[Env: %s]\n",
-     ImageTargetID.data(), EnvTargetID.data());
-
   return true;
 }
 
diff --git a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
index ab6c457fba7864..103635f07b9b3f 100644
--- a/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/PluginInterface.h
@@ -1062,10 +1062,14 @@ struct GenericPluginTy {
     return isValidDeviceId(SrcDeviceId) && isValidDeviceId(DstDeviceId);
   }
 
+  /// Top level interface to verify if a given ELF image can be executed on a
+  /// given target. Returns true if the \p Image is compatible with the plugin.
+  Expected<bool> checkELFImage(__tgt_device_image &Image) const;
+
   /// Indicate if an image is compatible with the plugin devices. Notice that
   /// this function may be called before actually initializing the devices. So
   /// we could not move this function into GenericDeviceTy.
-  virtual Expected<bool> isImageCompatible(__tgt_image_info *Info) const = 0;
+  virtual Expected<bool> isELFCompatible(StringRef Image) const = 0;
 
   /// Indicate whether the plugin supports empty images.
   virtual bool supportsEmptyImages() const { return false; }
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.h b/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
similarity index 75%
rename from openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.h
rename to openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
index 7b58cbaf59acef..068be1e46adba9 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.h
+++ b/openmp/libomptarget/plugins-nextgen/common/include/Utils/ELF.h
@@ -21,9 +21,17 @@
 namespace utils {
 namespace elf {
 
-/// Return non-zero, if the given \p image is an ELF object, which
-/// e_machine matches \p target_id; return zero otherwise.
-int32_t checkMachine(__tgt_device_image *Image, uint16_t TargetId);
+/// Returns true or false if the \p Buffer is an ELF file.
+bool isELF(llvm::StringRef Buffer);
+
+/// Checks if the given \p Object is a valid ELF matching the e_machine value.
+llvm::Expected<bool> checkMachine(llvm::StringRef Object, uint16_t EMachine);
+
+/// Returns the processor string associated with the ELF flags.
+llvm::Expected<llvm::StringRef> getProcessor(llvm::StringRef Object);
+
+/// Get the associated flags from the ELF header.
+llvm::Expected<uint32_t> getFlags(llvm::StringRef Object);
 
 /// Returns a pointer to the given \p Symbol inside of an ELF object.
 llvm::Expected<const void *> getSymbolAddress(
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
index 3c7d1ca8998787..3b07caca9d0def 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp
@@ -1626,6 +1626,26 @@ Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
   return Plugin::success();
 }
 
+Expected<bool> GenericPluginTy::checkELFImage(__tgt_device_image &Image) const {
+  StringRef Buffer(reinterpret_cast<const char *>(Image.ImageStart),
+                   target::getPtrDiff(Image.ImageEnd, Image.ImageStart));
+
+  // First check if this image is a regular ELF file.
+  if (!utils::elf::isELF(Buffer))
+    return false;
+
+  // Check if this image is an ELF with a matching machine value.
+  auto MachineOrErr = utils::elf::checkMachine(Buffer, getMagicElfBits());
+  if (!MachineOrErr)
+    return MachineOrErr.takeError();
+
+  if (!*MachineOrErr)
+    return false;
+
+  // Perform plugin-dependent checks for the specific architecture if needed.
+  return isELFCompatible(Buffer);
+}
+
 const bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
 #ifdef LIBOMPTARGET_RPC_SUPPORT
   return true;
@@ -1653,44 +1673,26 @@ int32_t __tgt_rtl_init_plugin() {
 }
 
 int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *TgtImage) {
+  // TODO: We should be able to perform a trivial ELF machine check without
+  // initializing the plugin first to save time if the plugin is not needed.
   if (!Plugin::isActive())
     return false;
 
-  if (utils::elf::checkMachine(TgtImage, Plugin::get().getMagicElfBits()))
-    return true;
-
-  return Plugin::get().getJIT().checkBitcodeImage(*TgtImage);
-}
-
-int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage,
-                                       __tgt_image_info *Info) {
-  if (!Plugin::isActive())
-    return false;
-
-  if (!__tgt_rtl_is_valid_binary(TgtImage))
+  // Check if this is a valid ELF with a matching machine and processor.
+  auto MatchOrErr = Plugin::get().checkELFImage(*TgtImage);
+  if (Error Err = MatchOrErr.takeError()) {
+    [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
+    DP("Failure to check validity of image %p: %s", TgtImage, ErrStr.c_str());
     return false;
-
-  // A subarchitecture was not specified. Assume it is compatible.
-  if (!Info->Arch)
+  } else if (*MatchOrErr) {
     return true;
-
-  // Check the compatibility with all the available devices. Notice the
-  // devices may not be initialized yet.
-  auto CompatibleOrErr = Plugin::get().isImageCompatible(Info);
-  if (!CompatibleOrErr) {
-    // This error should not abort the execution, so we just inform the user
-    // through the debug system.
-    std::string ErrString = toString(CompatibleOrErr.takeError());
-    DP("Failure to check whether image %p is valid: %s\n", TgtImage,
-       ErrString.data());
-    return false;
   }
 
-  bool Compatible = *CompatibleOrErr;
-  DP("Image is %scompatible with current environment: %s\n",
-     (Compatible) ? "" : "not", Info->Arch);
+  // Check if this is a valid LLVM-IR file with matching triple.
+  if (Plugin::get().getJIT().checkBitcodeImage(*TgtImage))
+    return true;
 
-  return Compatible;
+  return false;
 }
 
 int32_t __tgt_rtl_supports_empty_images() {
diff --git a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp b/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
index 305ea7d9c874b4..7c988e88e70989 100644
--- a/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ELF.h"
+#include "Utils/ELF.h"
 
 #include "Shared/APITypes.h"
 #include "Shared/Debug.h"
@@ -26,52 +26,228 @@ using namespace llvm;
 using namespace llvm::ELF;
 using namespace llvm::object;
 
-/// If the given range of bytes [\p BytesBegin, \p BytesEnd) represents
-/// a valid ELF, then invoke \p Callback on the ELFObjectFileBase
-/// created from this range, otherwise, return 0.
-/// If \p Callback is invoked, then return whatever value \p Callback returns.
-template <typename F>
-static int32_t withBytesAsElf(char *BytesBegin, char *BytesEnd, F Callback) {
-  size_t Size = BytesEnd - BytesBegin;
-  StringRef StrBuf(BytesBegin, Size);
-
-  auto Magic = identify_magic(StrBuf);
-  if (Magic != file_magic::elf && Magic != file_magic::elf_relocatable &&
-      Magic != file_magic::elf_executable &&
-      Magic != file_magic::elf_shared_object && Magic != file_magic::elf_core) {
-    DP("Not an ELF image!\n");
-    return 0;
+bool utils::elf::isELF(StringRef Buffer) {
+  switch (identify_magic(Buffer)) {
+  case file_magic::elf:
+  case file_magic::elf_relocatable:
+  case file_magic::elf_executable:
+  case file_magic::elf_shared_object:
+  case file_magic::elf_core:
+    return true;
+  default:
+    return false;
   }
+}
 
-  std::unique_ptr<MemoryBuffer> MemBuf =
-      MemoryBuffer::getMemBuffer(StrBuf, "", false);
-  Expected<std::unique_ptr<ObjectFile>> BinOrErr =
-      ObjectFile::createELFObjectFile(MemBuf->getMemBufferRef(),
-                                      /*InitContent=*/false);
-  if (!BinOrErr) {
-    DP("Unable to get ELF handle: %s!\n",
-       toString(BinOrErr.takeError()).c_str());
-    return 0;
+Expected<bool> utils::elf::checkMachine(StringRef Object, uint16_t EMachine) {
+  if (!isELF(Object))
+    return createError("Input is not an ELF.");
+
+  Expected<ELF64LEObjectFile> ElfOrErr =
+      ELF64LEObjectFile::create(MemoryBufferRef(Object, /*Identifier=*/""),
+                                /*InitContent=*/false);
+  if (!ElfOrErr)
+    return ElfOrErr.takeError();
+
+  const auto Header = ElfOrErr->getELFFile().getHeader();
+  if (Header.e_ident[EI_CLASS] != ELFCLASS64)
+    return createError("Only 64-bit ELF files are supported");
+  if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
+    return createError("Only executable ELF files are supported");
+
+  if (Header.e_machine == EM_AMDGPU) {
+    if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
+      return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
+    if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
+        Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5)
+      return createError("Invalid AMD ABI version, must be version 4 or 5");
+    if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
+        (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
+      return createError("Unsupported AMDGPU architecture");
+  } else if (Header.e_machine == EM_CUDA) {
+    if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
+      return createError("Invalid CUDA addressing mode");
+    if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35 ||
+        (Header.e_flags & EF_CUDA_SM) > EF_CUDA_SM90)
+      return createError("Unsupported NVPTX architecture");
   }
 
-  auto *Object = dyn_cast<const ELFObjectFileBase>(BinOrErr->get());
+  return Header.e_machine == EMachine;
+}
+
+static Expected<StringRef> getNVPTXProcessor(uint32_t Flags) {
+  switch (Flags & EF_CUDA_SM) {
+  case EF_CUDA_SM20:
+    return "sm_20";
+  case EF_CUDA_SM21:
+    return "sm_21";
+  case EF_CUDA_SM30:
+    return "sm_30";
+  case EF_CUDA_SM32:
+    return "sm_32";
+  case EF_CUDA_SM35:
+    return "sm_35";
+  case EF_CUDA_SM37:
+    return "sm_37";
+  case EF_CUDA_SM50:
+    return "sm_50";
+  case EF_CUDA_SM52:
+    return "sm_52";
+  case EF_CUDA_SM53:
+    return "sm_53";
+  case EF_CUDA_SM60:
+    return "sm_60";
+  case EF_CUDA_SM61:
+    return "sm_61";
+  case EF_CUDA_SM62:
+    return "sm_62";
+  case EF_CUDA_SM70:
+    return "sm_70";
+  case EF_CUDA_SM72:
+    return "sm_72";
+  case EF_CUDA_SM75:
+    return "sm_75";
+  case EF_CUDA_SM80:
+    return "sm_80";
+  case EF_CUDA_SM86:
+    return "sm_86";
+  case EF_CUDA_SM87:
+    return "sm_87";
+  case EF_CUDA_SM89:
+    return "sm_89";
+  case EF_CUDA_SM90:
+    return Flags & EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
+  default:
+    return createError("Unknown CUDA architecture");
+  }
+}
 
-  if (!Object) {
-    DP("Unknown ELF format!\n");
-    return 0;
+static Expected<StringRef> getAMDGPUProcessor(uint32_t Flags) {
+  switch (Flags & EF_AMDGPU_MACH) {
+  case EF_AMDGPU_MACH_AMDGCN_GFX600:
+    return "gfx600";
+  case EF_AMDGPU_MACH_AMDGCN_GFX601:
+    return "gfx601";
+  case EF_AMDGPU_MACH_AMDGCN_GFX602:
+    return "gfx602";
+  case EF_AMDGPU_MACH_AMDGCN_GFX700:
+    return "gfx700";
+  case EF_AMDGPU_MACH_AMDGCN_GFX701:
+    return "gfx701";
+  case EF_AMDGPU_MACH_AMDGCN_GFX702:
+    return "gfx702";
+  case EF_AMDGPU_MACH_AMDGCN_GFX703:
+    return "gfx703";
+  case EF_AMDGPU_MACH_AMDGCN_GFX704:
+    return "gfx704";
+  case EF_AMDGPU_MACH_AMDGCN_GFX705:
+    return "gfx705";
+  case EF_AMDGPU_MACH_AMDGCN_GFX801:
+    return "gfx801";
+  case EF_AMDGPU_MACH_AMDGCN_GFX802:
+    return "gfx802";
+  case EF_AMDGPU_MACH_AMDGCN_GFX803:
+    return "gfx803";
+  case EF_AMDGPU_MACH_AMDGCN_GFX805:
+    return "gfx805";
+  case EF_AMDGPU_MACH_AMDGCN_GFX810:
+    return "gfx810";
+  case EF_AMDGPU_MACH_AMDGCN_GFX900:
+    return "gfx900";
+  case EF_AMDGPU_MACH_AMDGCN_GFX902:
+    return "gfx902";
+  case EF_AMDGPU_MACH_AMDGCN_GFX904:
+    return "gfx904";
+  case EF_AMDGPU_MACH_AMDGCN_GFX906:
+    return "gfx906";
+  case EF_AMDGPU_MACH_AMDGCN_GFX908:
+    return "gfx908";
+  case EF_AMDGPU_MACH_AMDGCN_GFX909:
+    return "gfx909";
+  case EF_AMDGPU_MACH_AMDGCN_GFX90C:
+    return "gfx90c";
+  case EF_AMDGPU_MACH_AMDGCN_GFX90A:
+    return "gfx90a";
+  case EF_AMDGPU_MACH_AMDGCN_GFX940:
+    return "gfx940";
+  case EF_AMDGPU_MACH_AMDGCN_GFX941:
+    return "gfx941";
+  case EF_AMDGPU_MACH_AMDGCN_GFX942:
+    return "gfx942";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1010:
+    return "gfx1010";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1011:
+    return "gfx1011";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1012:
+    return "gfx1012";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1030:
+    return "gfx1030";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1031:
+    return "gfx1031";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1032:
+    return "gfx1032";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1033:
+    return "gfx1033";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1035:
+    return "gfx1035";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1034:
+    return "gfx1034";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1100:
+    return "gfx1100";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1013:
+    return "gfx1013";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1150:
+    return "gfx1150";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1103:
+    return "gfx1103";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1036:
+    return "gfx1036";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1101:
+    return "gfx1101";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1102:
+    return "gfx1102";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1200:
+    return "gfx1200";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1151:
+    return "gfx1151";
+  case EF_AMDGPU_MACH_AMDGCN_GFX1201:
+    return "gfx1201";
+  default:
+    return createError("Unknown AMDGPU architecture");
   }
+}
 
-  return Callback(Object);
+Expected<StringRef> utils::elf::getProcessor(StringRef Object) {
+  if (!isELF(Object))
+    return createError("Input is not an ELF.");
+
+  Expected<ELF64LEObjectFile> ElfOrErr =
+      ELF64LEObjectFile::create(MemoryBufferRef(Object, /*Identifier=*/""),
+                                /*InitContent=*/false);
+  if (!ElfOrErr)
+    return ElfOrErr.takeError();
+
+  const auto Header = ElfOrErr->getELFFile().getHeader();
+  if (Header.e_machine == EM_CUDA)
+    return getNVPTXProcessor(Header.e_flags);
+
+  if (Header.e_machine == EM_AMDGPU) {
+    return getAMDGPUProcessor(Header.e_flags);
+  }
+  return "";
 }
 
-// Check whether an image is valid for execution on target_id
-int32_t utils::elf::checkMachine(__tgt_device_image *Image, uint16_t TargetId) {
-  auto CheckMachine = [TargetId](const ELFObjectFileBase *Object) {
-    return TargetId == Object->getEMachine();
-  };
-  return withBytesAsElf(reinterpret_cast<char *>(Image->ImageStart),
-                        reinterpret_cast<char *>(Image->ImageEnd),
-                        CheckMachine);
+Expected<uint32_t> utils::elf::getFlags(StringRef Object) {
+  if (!isELF(Object))
+    return createError("Input is not an ELF.");
+
+  Expected<ELF64LEObjectFile> ElfOrErr =
+      ELF64LEObjectFile::create(MemoryBufferRef(Object, /*Identifier=*/""),
+                                /*InitContent=*/false);
+  if (!ElfOrErr)
+    return ElfOrErr.takeError();
+
+  return ElfOrErr->getELFFile().getHeader().e_flags;
 }
 
 template <class ELFT>
@@ -272,7 +448,8 @@ Expected<const void *> utils::elf::getSymbolAddress(
     return SecOrErr.takeError();
   const auto &Section = *SecOrErr;
 
-  // A section with SHT_NOBITS occupies no space in the file and has no offset.
+  // A section with SHT_NOBITS occupies no space in the file and has no
+  // offset.
   if (Section->sh_type == ELF::SHT_NOBITS)
     return createError(
         "invalid sh_type for symbol lookup, cannot be SHT_NOBITS");
diff --git a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
index 7bad411b9d8e3b..15a014821a24b5 100644
--- a/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp
@@ -22,6 +22,7 @@
 #include "GlobalHandler.h"
 #include "OpenMP/OMPT/Callback.h"
 #include "PluginInterface.h"
+#include "Utils/ELF.h"
 
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
@@ -1284,7 +1285,13 @@ struct CUDAPluginTy final : public GenericPluginTy {
   }
 
   /// Check whether the image is compatible with the available CUDA devices.
-  Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
+  Expected<bool> isELFCompatible(StringRef Image) const override {
+    // Get the associated architecture from the ELF.
+    auto ProcessorOrErr = utils::elf::getProcessor(Image);
+    if (!ProcessorOrErr)
+      return ProcessorOrErr.takeError();
+    StringRef ArchStr = *ProcessorOrErr;
+
     for (int32_t DevId = 0; DevId < getNumDevices(); ++DevId) {
       CUdevice Device;
       CUresult Res = cuDeviceGet(&Device, DevId);
@@ -1302,7 +1309,6 @@ struct CUDAPluginTy final : public GenericPluginTy {
       if (auto Err = Plugin::check(Res, "Error in cuDeviceGetAttribute: %s"))
         return std::move(Err);
 
-      StringRef ArchStr(Info->Arch);
       StringRef PrefixStr("sm_");
       if (!ArchStr.startswith(PrefixStr))
         return Plugin::error("Unrecognized image arch %s", ArchStr.data());
diff --git a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
index e1706cfb2cbf1e..88b5236d31f482 100644
--- a/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp
@@ -35,7 +35,7 @@
 
 // The ELF ID should be defined at compile-time by the build system.
 #ifndef TARGET_ELF_ID
-#define TARGET_ELF_ID 0
+#define TARGET_ELF_ID ELF::EM_NONE
 #endif
 
 namespace llvm {
@@ -397,9 +397,7 @@ struct GenELF64PluginTy final : public GenericPluginTy {
   }
 
   /// All images (ELF-compatible) should be compatible with this plugin.
-  Expected<bool> isImageCompatible(__tgt_image_info *Info) const override {
-    return true;
-  }
+  Expected<bool> isELFCompatible(StringRef) const override { return true; }
 
   Triple::ArchType getTripleArch() const override {
     return Triple::LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE;
diff --git a/openmp/libomptarget/src/DeviceImage.cpp b/openmp/libomptarget/src/DeviceImage.cpp
index 910e1907dcfe6d..1d39bb9ab8da65 100644
--- a/openmp/libomptarget/src/DeviceImage.cpp
+++ b/openmp/libomptarget/src/DeviceImage.cpp
@@ -50,5 +50,4 @@ DeviceImageTy::DeviceImageTy(__tgt_bin_desc &BinaryDesc,
       static_cast<const void *>(Binary->getImage().bytes_end()));
 
   Image = __tgt_device_image{Begin, End, Image.EntriesBegin, Image.EntriesEnd};
-  ImageInfo = __tgt_image_info{Binary->getArch().data()};
 }
diff --git a/openmp/libomptarget/src/PluginManager.cpp b/openmp/libomptarget/src/PluginManager.cpp
index 34a0d1dcefa526..da2e08180eead8 100644
--- a/openmp/libomptarget/src/PluginManager.cpp
+++ b/openmp/libomptarget/src/PluginManager.cpp
@@ -207,20 +207,13 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) {
   for (DeviceImageTy &DI : PM->deviceImages()) {
     // Obtain the image and information that was previously extracted.
     __tgt_device_image *Img = &DI.getExecutableImage();
-    __tgt_image_info *Info = &DI.getImageInfo();
 
     PluginAdaptorTy *FoundRTL = nullptr;
 
     // Scan the RTLs that have associated images until we find one that supports
     // the current image.
     for (auto &R : PM->pluginAdaptors()) {
-      if (R.is_valid_binary_info) {
-        if (!R.is_valid_binary_info(Img, Info)) {
-          DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
-             DPxPTR(Img->ImageStart), R.Name.c_str());
-          continue;
-        }
-      } else if (!R.is_valid_binary(Img)) {
+      if (!R.is_valid_binary(Img)) {
         DP("Image " DPxMOD " is NOT compatible with RTL %s!\n",
            DPxPTR(Img->ImageStart), R.Name.c_str());
         continue;
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 0d16a41c7616c7..e724b2f6db8b5f 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -308,14 +308,9 @@ void handleTargetOutcome(bool Success, ident_t *Loc) {
         FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html "
                         "for debugging options.\n");
 
-      if (!PM->getNumUsedPlugins()) {
-        llvm::SmallVector<llvm::StringRef> Archs;
-        llvm::transform(PM->deviceImages(), std::back_inserter(Archs),
-                        [](const auto &X) { return X.getArch("empty"); });
+      if (!PM->getNumUsedPlugins())
         FAILURE_MESSAGE(
             "No images found compatible with the installed hardware. ");
-        fprintf(stderr, "Found (%s)\n", llvm::join(Archs, ",").c_str());
-      }
 
       SourceInfo Info(Loc);
       if (Info.isAvailible())



More information about the Openmp-commits mailing list