[Mlir-commits] [mlir] [mlir][Target] Support Fatbin target for static nvptxcompiler (PR #118044)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Nov 28 19:12:51 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Zichen Lu (MikaOvO)
<details>
<summary>Changes</summary>
In `lib/Target/LLVM/NVVM/Target.cpp`, `NVPTXSerializer` compile PTX to binary with two different flows controlled by `MLIR_ENABLE_NVPTXCOMPILER`.
If building mlir with `-DMLIR_ENABLE_NVPTXCOMPILER=ON`, the flow does not check if the target is `gpu::CompilationTarget::Fatbin`, and compile PTX to cubin directly, which is not consistent with another flow.
I have tested it locally, the two flows can return the same Fatbin result after inputing the same `GpuModule`.
---
Full diff: https://github.com/llvm/llvm-project/pull/118044.diff
2 Files Affected:
- (modified) mlir/lib/Target/LLVM/CMakeLists.txt (+14-2)
- (modified) mlir/lib/Target/LLVM/NVVM/Target.cpp (+41)
``````````diff
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 422f7e5fa7caec..4843377f1797fa 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -66,12 +66,14 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
set(MLIR_CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_ROOT})
endif()
- # Add the `nvptxcompiler` library.
+ # Add the `nvptxcompiler` library and the `nvfatbin` library.
if(MLIR_ENABLE_NVPTXCOMPILER)
# Find the `nvptxcompiler` library.
# TODO: Bump the MLIR CMake version to 3.25 and use `CUDA::nvptxcompiler_static`.
find_library(MLIR_NVPTXCOMPILER_LIB_PATH nvptxcompiler_static
PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
+ find_library(MLIR_NVFATBIN_LIB_PATH nvfatbin_static
+ PATHS ${CUDAToolkit_LIBRARY_DIR} NO_DEFAULT_PATH)
# Fail if `nvptxcompiler_static` couldn't be found.
if(MLIR_NVPTXCOMPILER_LIB_PATH STREQUAL "MLIR_NVPTXCOMPILER_LIB_PATH-NOTFOUND")
@@ -79,14 +81,24 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
"Requested using the `nvptxcompiler` library backend but it couldn't be found.")
endif()
+ # Fail if `nvfatbin_static` couldn't be found.
+ if(MLIR_NVFATBIN_LIB_PATH STREQUAL "MLIR_NVFATBIN_LIB_PATH-NOTFOUND")
+ message(FATAL_ERROR
+ "Requested using the `nvfatbin` library backend but it couldn't be found.")
+ endif()
+
add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
+ add_library(MLIR_NVFATBIN_LIB STATIC IMPORTED GLOBAL)
# Downstream projects can modify this path and use it in CMake. For example:
# add_library(MLIR_NVPTXCOMPILER_LIB STATIC IMPORTED GLOBAL)
# set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${...})
# where `...` is to be replaced with the path to the library.
set_property(TARGET MLIR_NVPTXCOMPILER_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVPTXCOMPILER_LIB_PATH})
- # Link against `nvptxcompiler_static`. TODO: use `CUDA::nvptxcompiler_static`.
+ set_property(TARGET MLIR_NVFATBIN_LIB PROPERTY IMPORTED_LOCATION ${MLIR_NVFATBIN_LIB_PATH})
+ # Link against `nvptxcompiler_static` and `nvfatbin_static`.
+ # TODO: use `CUDA::nvptxcompiler_static` and `CUDA::nvfatbin_static`.
target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVPTXCOMPILER_LIB)
+ target_link_libraries(MLIRNVVMTarget PRIVATE MLIR_NVFATBIN_LIB)
target_include_directories(obj.MLIRNVVMTarget PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
endif()
else()
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index bca26e3a0e84a9..1c5aa24dffc9d0 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -473,6 +473,18 @@ NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
} \
} while (false)
+#include <nvFatbin.h>
+
+#define RETURN_ON_NVFATBIN_ERROR(expr) \
+ do { \
+ auto result = (expr); \
+ if (result != nvFatbinResult::NVFATBIN_SUCCESS) { \
+ emitError(loc) << llvm::Twine(#expr).concat(" failed with error: ") \
+ << nvFatbinGetErrorString(result); \
+ return std::nullopt; \
+ } \
+ } while (false)
+
std::optional<SmallVector<char, 0>>
NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
Location loc = getOperation().getLoc();
@@ -486,6 +498,11 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
targetOptions.tokenizeCmdOptions();
cmdOpts.second.append(
{"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
+ bool useFatbin32 = false;
+ for (const char *option : cmdOpts.second) {
+ if (StringRef(option) == "-32")
+ useFatbin32 = true;
+ }
// Create the compiler handle.
RETURN_ON_NVPTXCOMPILER_ERROR(
@@ -538,6 +555,30 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
});
#undef DEBUG_TYPE
RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
+
+ if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin) {
+ const char *cubinOpts[1] = {"-64"};
+ if (useFatbin32) {
+ cubinOpts[0] = {"-32"};
+ }
+ nvFatbinHandle handle;
+
+ auto chip = getTarget().getChip();
+ chip.consume_front("sm_");
+
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinCreate(&handle, cubinOpts, 1));
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinAddCubin(
+ handle, binary.data(), binary.size(), chip.data(), nullptr));
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinAddPTX(
+ handle, ptxCode.data(), ptxCode.size(), chip.data(), nullptr, nullptr));
+
+ size_t fatbinSize;
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinSize(handle, &fatbinSize));
+ SmallVector<char, 0> fatbin(fatbinSize, 0);
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinGet(handle, (void *)fatbin.data()));
+ RETURN_ON_NVFATBIN_ERROR(nvFatbinDestroy(&handle));
+ return fatbin;
+ }
return binary;
}
#endif // MLIR_ENABLE_NVPTXCOMPILER
``````````
</details>
https://github.com/llvm/llvm-project/pull/118044
More information about the Mlir-commits
mailing list