[Openmp-commits] [openmp] c63dced - [OpenMP][JIT] Introduce support for AMDGPU

Wed Jan 4 10:15:00 PST 2023

Author: Johannes Doerfert
Date: 2023-01-04T10:14:27-08:00
New Revision: c63dced93b46ec3ab925d9049f2a8c8901f5d913

URL: https://github.com/llvm/llvm-project/commit/c63dced93b46ec3ab925d9049f2a8c8901f5d913
DIFF: https://github.com/llvm/llvm-project/commit/c63dced93b46ec3ab925d9049f2a8c8901f5d913.diff

LOG: [OpenMP][JIT] Introduce support for AMDGPU

To JIT kernels for AMDGPUs we need to provide the architecture, the
triple, and a post-link callback. The first two are simple, the last one
is a little more complicated since we need to invoke `lld`. There is
some library interface but for that we need the lld library, which is
not generally available, thus we go with the executable for now. In
either way we need to manifest the (amdgcn) object file and read the
output from another file. We should try to avoid that in the future.
The options for `lld` are copied from the way clang invokes it.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D140720

Added: 
    

Modified: 
    openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
index 10477aee829f0..28e70341fe1fc 100644

--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -18,6 +18,7 @@
 #include <hsa_ext_amd.h>
 #include <mutex>
 #include <string>
+#include <system_error>
 #include <unistd.h>
 #include <unordered_map>
 
@@ -28,10 +29,17 @@
 #include "Utilities.h"
 #include "UtilitiesRTL.h"
 
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 namespace omp {
@@ -1519,6 +1527,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     if (auto Err = initMemoryPools())
       return Err;
 
+    char GPUName[64];
+    if (auto Err = getDeviceAttr(HSA_AGENT_INFO_NAME, GPUName))
+      return Err;
+    Arch = GPUName;
+
     // Get the wavefront size.
     uint32_t WavefrontSize = 0;
     if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize))
@@ -1626,6 +1639,61 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
     return Plugin::success();
   }
 
+  Expected<std::unique_ptr<MemoryBuffer>>
+  doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {
+
+    // TODO: We should try to avoid materialization but there seems to be no
+    // good linker interface w/o file i/o.
+    SmallString<128> LinkerOutputFilePath;
+    std::error_code EC = sys::fs::createTemporaryFile(
+        "amdgpu-pre-link-jit", ".out", LinkerOutputFilePath);
+    if (EC)
+      return createStringError(EC,
+                               "Failed to create temporary file for linker");
+
+    SmallString<128> LinkerInputFilePath = LinkerOutputFilePath;
+    LinkerInputFilePath.pop_back_n(2);
+
+    auto FD = raw_fd_ostream(LinkerInputFilePath.data(), EC);
+    if (EC)
+      return createStringError(EC, "Failed to open temporary file for linker");
+    FD.write(MB->getBufferStart(), MB->getBufferSize());
+    FD.close();
+
+    const auto &ErrorOrPath = sys::findProgramByName("lld");
+    if (!ErrorOrPath)
+      return createStringError(inconvertibleErrorCode(),
+                               "Failed to find `lld` on the PATH.");
+
+    std::string LLDPath = ErrorOrPath.get();
+    INFO(OMP_INFOTYPE_PLUGIN_KERNEL, getDeviceId(),
+         "Using `%s` to link JITed amdgcn ouput.", LLDPath.c_str());
+
+    std::string MCPU = "-plugin-opt=mcpu=" + getArch();
+
+    StringRef Args[] = {LLDPath,
+                        "-flavor",
+                        "gnu",
+                        "--no-undefined",
+                        "-shared",
+                        MCPU,
+                        "-o",
+                        LinkerOutputFilePath.data(),
+                        LinkerInputFilePath.data()};
+
+    std::string Error;
+    int RC = sys::ExecuteAndWait(LLDPath, Args, std::nullopt, {}, 0, 0, &Error);
+    if (RC)
+      return createStringError(inconvertibleErrorCode(),
+                               "Linking optimized bitcode failed: %s",
+                               Error.c_str());
+
+    return std::move(
+        MemoryBuffer::getFileOrSTDIN(LinkerOutputFilePath.data()).get());
+  }
+
+  std::string getArch() const override { return Arch; }
+
   /// Allocate and construct an AMDGPU kernel.
   Expected<GenericKernelTy *>
   constructKernelEntry(const __tgt_offload_entry &KernelEntry,
@@ -2027,6 +2095,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
   /// The agent handler corresponding to the device.
   hsa_agent_t Agent;
 
+  /// The GPU architecture.
+  std::string Arch;
+
   /// Reference to the host device.
   AMDHostDeviceTy &HostDevice;
 
@@ -2255,6 +2326,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     return Plugin::check(Status, "Error in hsa_shut_down: %s");
   }
 
+  Triple::ArchType getTripleArch() const override { return Triple::amdgcn; }
+
   /// Get the ELF code for recognizing the compatible image binary.
   uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }