[Mlir-commits] [mlir] [mlir][Target] Support Fatbin target for static nvptxcompiler (PR #118044)

Sun Dec 1 23:32:23 PST 2024

https://github.com/MikaOvO updated https://github.com/llvm/llvm-project/pull/118044

>From 869a5dd52dbf0877b2266ddae2effa91027be711 Mon Sep 17 00:00:00 2001
From: Zichen Lu <mikaovo2000 at gmail.com>
Date: Fri, 29 Nov 2024 11:02:05 +0800
Subject: [PATCH] [mlir][Target] Support Fatbin target for static nvptxcompiler

---
 mlir/CMakeLists.txt                  |  3 ++
 mlir/lib/Target/LLVM/CMakeLists.txt  | 16 +++++++++++
 mlir/lib/Target/LLVM/NVVM/Target.cpp | 41 ++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 2880dc30bca91f..e4a40b23fe3a35 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -135,6 +135,9 @@ set(MLIR_ENABLE_VULKAN_RUNNER 0 CACHE BOOL "Enable building the MLIR Vulkan runn
 set(MLIR_ENABLE_NVPTXCOMPILER 0 CACHE BOOL
     "Statically link the nvptxlibrary instead of calling ptxas as a subprocess \
     for compiling PTX to cubin")
+set(MLIR_ENABLE_NVPTXCOMPILER_NVFATBIN 0 CACHE BOOL
+    "Statically link the nvfatbin library instead of calling fatbinary as a subprocess \
+    for compiling PTX to fatbin")
 
 set(MLIR_ENABLE_PDL_IN_PATTERNMATCH 1 CACHE BOOL "Enable PDL in PatternMatch")
 
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 422f7e5fa7caec..91e4776a3b6fbb 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -88,6 +88,22 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
       # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
       target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVPTXCOMPILER_LIB)
       target_include_directories(obj.MLIRNVVMTarget PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
+
+      # Add the `nvfatbin` library.
+      if(MLIR_ENABLE_NVPTXCOMPILER_NVFATBIN)
+        find_library(MLIR_NVFATBIN_LIB_PATH nvfatbin_static
+                    PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
+        # Fail if `nvfatbin_static` couldn't be found.
+        if(MLIR_NVFATBIN_LIB_PATH STREQUAL "MLIR_NVFATBIN_LIB_PATH-NOTFOUND")
+          message(FATAL_ERROR
+                  "Requested using the static `nvptxcompiler` library which requires the \
+                  'nvfatbin` library, but it couldn't be found.")
+        endif()
+
+        add_library(MLIR_NVFATBIN_LIB STATIC IMPORTED GLOBAL)
+        set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})  
+        target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVFATBIN_LIB)
+      endif()
     endif()
   else()
     # Fail if `MLIR_ENABLE_NVPTXCOMPILER` is enabled and the toolkit couldn't be found.
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index bca26e3a0e84a9..8de92383ded629 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -473,6 +473,20 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
     }                                                                          \
   } while (false)
 
+#if MLIR_ENABLE_NVPTXCOMPILER_NVFATBIN
+#include "nvFatbin.h"
+
+#define RETURN_ON_NVFATBIN_ERROR(expr)                                         \
+  do {                                                                         \
+    auto result = (expr);                                                      \
+    if (result != nvFatbinResult::NVFATBIN_SUCCESS) {                          \
+      emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ")      \
+                     << nvFatbinGetErrorString(result);                        \
+      return std::nullopt;                                                     \
+    }                                                                          \
+  } while (false)
+#endif // MLIR_ENABLE_NVPTXCOMPILER_NVFATBIN
+
 std::optional<SmallVector<char, 0>>
 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
   Location loc = getOperation().getLoc();
@@ -486,6 +500,9 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
       targetOptions.tokenizeCmdOptions();
   cmdOpts.second.append(
       {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
+  bool useFatbin32 = llvm::any_of(cmdOpts.second, [](const char *option) {
+    return llvm::StringRef(option) == "-32";
+  });
 
   // Create the compiler handle.
   RETURN_ON_NVPTXCOMPILER_ERROR(
@@ -538,6 +555,30 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
   });
 #undef DEBUG_TYPE
   RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
+
+#if MLIR_ENABLE_NVPTXCOMPILER_NVFATBIN
+  if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
+    const char *cubinOpts[1] = {useFatbin32 ? "-32" : "-64"};
+    nvFatbinHandle handle;
+
+    auto chip = getTarget().getChip();
+    chip.consume_front("sm_");
+
+    RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
+    RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
+        handle, binary.data(), binary.size(), chip.data(), nullptr));
+    RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
+        handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
+
+    size_t fatbinSize;
+    RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
+    SmallVector<char, 0> fatbin(fatbinSize, 0);
+    RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
+    RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
+    return fatbin;
+  }
+#endif // MLIR_ENABLE_NVPTXCOMPILER_NVFATBIN
+
   return binary;
 }
 #endif // MLIR_ENABLE_NVPTXCOMPILER