[Mlir-commits] [mlir] [mlir][GPU] Remove the SerializeToCubin pass (PR #82486)
Fabian Mora
llvmlistbot at llvm.org
Wed Feb 21 04:20:47 PST 2024
https://github.com/fabianmcg created https://github.com/llvm/llvm-project/pull/82486
The `SerializeToCubin` pass was deprecated in September 2023 in favor of GPU compilation attributes.
This patch removes `SerializeToCubin` from the repo.
>From 1c345bf40d5e10980a6900cdecbcbdbdd4c24f1a Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Wed, 21 Feb 2024 12:15:41 +0000
Subject: [PATCH] [mlir][GPU] Remove the SerializeToCubin pass
The `SerializeToCubin` pass was deprecated in September 2023 in favor of GPU
compilation attributes.
This patch removes `SerializeToCubin` from the repo.
---
mlir/CMakeLists.txt | 1 -
.../mlir/Dialect/GPU/Transforms/Passes.h | 14 --
mlir/lib/Dialect/GPU/CMakeLists.txt | 52 -----
.../GPU/Transforms/SerializeToCubin.cpp | 180 ------------------
4 files changed, 247 deletions(-)
delete mode 100644 mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 2d9f78e03ba76b..16c898bdeb6e00 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -123,7 +123,6 @@ else()
endif()
add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ENABLE_ROCM_CONVERSIONS})
-set(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION 0 CACHE BOOL "Enable deprecated GPU serialization passes")
set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner")
set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner")
set(MLIR_ENABLE_SYCL_RUNNER 0 CACHE BOOL "Enable building the mlir Sycl runner")
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 5885facd07541e..8f7466a697d854 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -147,25 +147,11 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
// Registration
//===----------------------------------------------------------------------===//
-/// Register pass to serialize GPU kernel functions to a CUBIN binary
-/// annotation.
-LLVM_DEPRECATED("use Target attributes instead", "")
-void registerGpuSerializeToCubinPass();
-
/// Register pass to serialize GPU kernel functions to a HSAco binary
/// annotation.
LLVM_DEPRECATED("use Target attributes instead", "")
void registerGpuSerializeToHsacoPass();
-/// Create an instance of the GPU kernel function to CUBIN binary serialization
-/// pass with optLevel (default level 2).
-LLVM_DEPRECATED("use Target attributes instead", "")
-std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
- StringRef chip,
- StringRef features,
- int optLevel = 2,
- bool dumpPtx = false);
-
/// Create an instance of the GPU kernel function to HSAco binary serialization
/// pass.
LLVM_DEPRECATED("use Target attributes instead", "")
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index e5776e157b612c..51cfa2216e0c1f 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -1,11 +1,3 @@
-if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
- set(NVPTX_LIBS
- NVPTXCodeGen
- NVPTXDesc
- NVPTXInfo
- )
-endif()
-
if (MLIR_ENABLE_ROCM_CONVERSIONS)
set(AMDGPU_LIBS
IRReader
@@ -60,7 +52,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
Transforms/ParallelLoopMapper.cpp
Transforms/ROCDLAttachTarget.cpp
Transforms/SerializeToBlob.cpp
- Transforms/SerializeToCubin.cpp
Transforms/SerializeToHsaco.cpp
Transforms/ShuffleRewriter.cpp
Transforms/SPIRVAttachTarget.cpp
@@ -74,7 +65,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
Core
MC
Target
- ${NVPTX_LIBS}
${AMDGPU_LIBS}
DEPENDS
@@ -110,48 +100,6 @@ add_mlir_dialect_library(MLIRGPUTransforms
add_subdirectory(TransformOps)
add_subdirectory(Pipelines)
-if(MLIR_ENABLE_CUDA_RUNNER)
- if(NOT MLIR_ENABLE_CUDA_CONVERSIONS)
- message(SEND_ERROR
- "Building mlir with cuda support requires the NVPTX backend")
- endif()
-
- # Configure CUDA language support. Using check_language first allows us to
- # give a custom error message.
- include(CheckLanguage)
- check_language(CUDA)
- if (CMAKE_CUDA_COMPILER)
- enable_language(CUDA)
- else()
- message(SEND_ERROR
- "Building mlir with cuda support requires a working CUDA install")
- endif()
-
- # Enable gpu-to-cubin pass.
- target_compile_definitions(obj.MLIRGPUTransforms
- PRIVATE
- MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
- )
-
- # Add CUDA headers includes and the libcuda.so library.
- target_include_directories(obj.MLIRGPUTransforms
- PRIVATE
- ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
- )
-
- # Add link path for the cuda driver library.
- find_library(CUDA_DRIVER_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
- get_filename_component(CUDA_DRIVER_LIBRARY_PATH "${CUDA_DRIVER_LIBRARY}" DIRECTORY)
- target_link_directories(MLIRGPUTransforms PRIVATE ${CUDA_DRIVER_LIBRARY_PATH})
-
- target_link_libraries(MLIRGPUTransforms
- PRIVATE
- MLIRNVVMToLLVMIRTranslation
- cuda
- )
-
-endif()
-
if(MLIR_ENABLE_ROCM_CONVERSIONS)
if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
message(SEND_ERROR
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
deleted file mode 100644
index 34ad4e6868e157..00000000000000
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that serializes a gpu module into CUBIN blob and
-// adds that blob as a string attribute of the module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "llvm/Support/Debug.h"
-
-#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Threading.h"
-
-#include <cuda.h>
-
-using namespace mlir;
-
-static void emitCudaError(const llvm::Twine &expr, const char *buffer,
- CUresult result, Location loc) {
- const char *error = nullptr;
- cuGetErrorString(result, &error);
- emitError(loc,
- expr.concat(error ? " failed with error code " + llvm::Twine{error}
- : llvm::Twine(" failed with unknown error "))
- .concat("[")
- .concat(buffer)
- .concat("]"));
-}
-
-#define RETURN_ON_CUDA_ERROR(expr) \
- do { \
- if (auto status = (expr)) { \
- emitCudaError(#expr, jitErrorBuffer, status, loc); \
- return {}; \
- } \
- } while (false)
-
-namespace {
-class SerializeToCubinPass
- : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
- static llvm::once_flag initializeBackendOnce;
-
-public:
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)
-
- SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
- StringRef chip = "sm_35", StringRef features = "+ptx60",
- int optLevel = 2, bool dumpPtx = false);
-
- StringRef getArgument() const override { return "gpu-to-cubin"; }
- StringRef getDescription() const override {
- return "Lower GPU kernel function to CUBIN binary annotations";
- }
-
-private:
- // Serializes PTX to CUBIN.
- std::unique_ptr<std::vector<char>>
- serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-// Sets the 'option' to 'value' unless it already has a value.
-static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
- if (!option.hasValue())
- option = value.str();
-}
-
-llvm::once_flag SerializeToCubinPass::initializeBackendOnce;
-
-SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
- StringRef features, int optLevel,
- bool dumpPtx) {
- // No matter how this pass is constructed, ensure that the NVPTX backend
- // is initialized exactly once.
- llvm::call_once(initializeBackendOnce, []() {
- // Initialize LLVM NVPTX backend.
-#if LLVM_HAS_NVPTX_TARGET
- LLVMInitializeNVPTXTarget();
- LLVMInitializeNVPTXTargetInfo();
- LLVMInitializeNVPTXTargetMC();
- LLVMInitializeNVPTXAsmPrinter();
-#endif
- });
-
- maybeSetOption(this->triple, triple);
- maybeSetOption(this->chip, chip);
- maybeSetOption(this->features, features);
- this->dumpPtx = dumpPtx;
- if (this->optLevel.getNumOccurrences() == 0)
- this->optLevel.setValue(optLevel);
-}
-
-std::unique_ptr<std::vector<char>>
-SerializeToCubinPass::serializeISA(const std::string &isa) {
- Location loc = getOperation().getLoc();
- char jitErrorBuffer[4096] = {0};
-
- RETURN_ON_CUDA_ERROR(cuInit(0));
-
- // Linking requires a device context.
- CUdevice device;
- RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
- CUcontext context;
- // Use the primary context.
- RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRetain(&context, device));
- // Push the primary context so that the next CUDA operations
- // actually use it.
- RETURN_ON_CUDA_ERROR(cuCtxPushCurrent(context));
- CUlinkState linkState;
-
- CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
- void *jitOptionsVals[] = {jitErrorBuffer,
- reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
-
- RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
- jitOptions, /* jit options */
- jitOptionsVals, /* jit option values */
- &linkState));
-
- auto kernelName = getOperation().getName().str();
- if (dumpPtx) {
- llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
- llvm::dbgs() << isa << "\n";
- }
- RETURN_ON_CUDA_ERROR(cuLinkAddData(
- linkState, CUjitInputType::CU_JIT_INPUT_PTX,
- const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
- kernelName.c_str(), 0, /* number of jit options */
- nullptr, /* jit options */
- nullptr /* jit option values */
- ));
-
- void *cubinData;
- size_t cubinSize;
- RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
-
- char *cubinAsChar = static_cast<char *>(cubinData);
- auto result =
- std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
-
- // This will also destroy the cubin data.
- RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
- // Pop and release the primary context.
- CUcontext poppedContext;
- RETURN_ON_CUDA_ERROR(cuCtxPopCurrent(&poppedContext));
- RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRelease(device));
-
- return result;
-}
-
-// Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
-void mlir::registerGpuSerializeToCubinPass() {
- PassRegistration<SerializeToCubinPass> registerSerializeToCubin(
- [] { return std::make_unique<SerializeToCubinPass>(); });
-}
-
-std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
- StringRef arch,
- StringRef features,
- int optLevel,
- bool dumpPtx) {
- return std::make_unique<SerializeToCubinPass>(triple, arch, features,
- optLevel, dumpPtx);
-}
-
-#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
-void mlir::registerGpuSerializeToCubinPass() {}
-#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE
More information about the Mlir-commits
mailing list