[Mlir-commits] [mlir] [mlir][gpu] Deprecate gpu::Serialziation* passes. (PR #65857)
Fabian Mora
llvmlistbot at llvm.org
Sat Sep 9 11:30:58 PDT 2023
https://github.com/fabianmcg created https://github.com/llvm/llvm-project/pull/65857:
Deprecate the `gpu-to-cubin` & `gpu-to-hsaco` passes in favor of the `TargetAttr` workflow. This patch removes remaining upstream uses of the aforementioned passes, including the option to use them in `mlir-opt`. A future patch will remove these passes entirely.
**NOTE:**
1. When testing on an NVIDIA A100 the test `Integration/Dialect/SparseTensor/GPU/CUDA/sparse-gemm-lib.mlir` failed with:
```
'cuMemAlloc(&ptr, sizeBytes)' failed with 'CUDA_ERROR_INVALID_VALUE'
```
However, the test failed even without the switch to the new workflow, if someone else could test and verify it would be appreciate it. All other tests succeeded including: `CUDA_SM80_LT_TESTS`.
2. The [SM_90 integration tests](https://github.com/llvm/llvm-project/tree/main/mlir/test/Integration/GPU/CUDA/sm90) still need to be ported into the new workflow, so this patch is dependent on that porting.
>From 06178712668fe368f83d06d233710dbfd83c49ba Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Fri, 8 Sep 2023 22:04:58 +0000
Subject: [PATCH] [mlir][gpu] Deprecate gpu::Serialziation* passes.
Deprecate the `gpu-to-cubin` & `gpu-to-hsaco` passes in favor of the `TargetAttr`
workflow. This patch removes remaining upstream uses of the aforementioned passes,
including the option to use them in `mlir-opt`. A future patch will remove these
passes entirely.
---
.../mlir/Dialect/GPU/Transforms/Passes.h | 4 +
mlir/include/mlir/InitAllPasses.h | 2 -
.../SparseTensor/Pipelines/CMakeLists.txt | 8 --
.../Pipelines/SparseTensorPipelines.cpp | 10 ++-
mlir/test/Conversion/GPUToCUDA/lit.local.cfg | 2 -
.../GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir | 25 -------
mlir/test/Conversion/GPUToROCm/lit.local.cfg | 2 -
.../lower-rocdl-kernel-to-hsaco.mlir | 25 -------
mlir/test/lib/Dialect/GPU/CMakeLists.txt | 11 ---
.../GPU/TestConvertGPUKernelToCubin.cpp | 73 -------------------
.../GPU/TestConvertGPUKernelToHsaco.cpp | 72 ------------------
mlir/tools/mlir-opt/mlir-opt.cpp | 6 --
12 files changed, 10 insertions(+), 230 deletions(-)
delete mode 100644 mlir/test/Conversion/GPUToCUDA/lit.local.cfg
delete mode 100644 mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
delete mode 100644 mlir/test/Conversion/GPUToROCm/lit.local.cfg
delete mode 100644 mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
delete mode 100644 mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
delete mode 100644 mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 033e8755501f967..2a891a7d24f809a 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -134,14 +134,17 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
/// Register pass to serialize GPU kernel functions to a CUBIN binary
/// annotation.
+LLVM_DEPRECATED("use Target attributes instead", "")
void registerGpuSerializeToCubinPass();
/// Register pass to serialize GPU kernel functions to a HSAco binary
/// annotation.
+LLVM_DEPRECATED("use Target attributes instead", "")
void registerGpuSerializeToHsacoPass();
/// Create an instance of the GPU kernel function to CUBIN binary serialization
/// pass with optLevel (default level 2).
+LLVM_DEPRECATED("use Target attributes instead", "")
std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
StringRef chip,
StringRef features,
@@ -150,6 +153,7 @@ std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
/// Create an instance of the GPU kernel function to HSAco binary serialization
/// pass.
+LLVM_DEPRECATED("use Target attributes instead", "")
std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
StringRef arch,
StringRef features,
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index 8f3f92ae43145d1..f7271737c66d1cb 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -65,8 +65,6 @@ inline void registerAllPasses() {
bufferization::registerBufferizationPasses();
func::registerFuncPasses();
registerGPUPasses();
- registerGpuSerializeToCubinPass();
- registerGpuSerializeToHsacoPass();
registerLinalgPasses();
registerNVGPUPasses();
registerSparseTensorPasses();
diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
index 3cf530abd744e8e..234a0d82babef67 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
@@ -27,11 +27,3 @@ add_mlir_dialect_library(MLIRSparseTensorPipelines
MLIRVectorToLLVM
MLIRVectorTransforms
)
-
-if(MLIR_ENABLE_CUDA_RUNNER)
- # Enable gpu-to-cubin pass.
- target_compile_definitions(obj.MLIRSparseTensorPipelines
- PRIVATE
- MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
- )
-endif()
diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index 24c4c4c43a93dea..37f9e09d34c04e7 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -78,11 +78,13 @@ void mlir::sparse_tensor::buildSparseCompiler(
// Finalize GPU code generation.
if (gpuCodegen) {
-#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
- pm.addNestedPass<gpu::GPUModuleOp>(createGpuSerializeToCubinPass(
- options.gpuTriple, options.gpuChip, options.gpuFeatures));
-#endif
+ GpuNVVMAttachTargetOptions nvvmTargetOptions;
+ nvvmTargetOptions.triple = options.gpuTriple;
+ nvvmTargetOptions.chip = options.gpuChip;
+ nvvmTargetOptions.features = options.gpuFeatures;
+ pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
pm.addPass(createGpuToLLVMConversionPass());
+ pm.addPass(createGpuModuleToBinaryPass());
}
pm.addPass(createReconcileUnrealizedCastsPass());
diff --git a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg b/mlir/test/Conversion/GPUToCUDA/lit.local.cfg
deleted file mode 100644
index bc470ccc5733a96..000000000000000
--- a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not config.run_cuda_tests:
- config.unsupported = True
diff --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
deleted file mode 100644
index 0a2ac552a7c6db1..000000000000000
--- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s
-
-// CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"}
-gpu.module @foo {
- llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr)
- // CHECK: attributes {gpu.kernel}
- attributes { gpu.kernel } {
- llvm.return
- }
-}
-
-// CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"}
-gpu.module @bar {
- // CHECK: func @kernel_a
- llvm.func @kernel_a()
- attributes { gpu.kernel } {
- llvm.return
- }
-
- // CHECK: func @kernel_b
- llvm.func @kernel_b()
- attributes { gpu.kernel } {
- llvm.return
- }
-}
diff --git a/mlir/test/Conversion/GPUToROCm/lit.local.cfg b/mlir/test/Conversion/GPUToROCm/lit.local.cfg
deleted file mode 100644
index 2f5cc9f3bad9737..000000000000000
--- a/mlir/test/Conversion/GPUToROCm/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not config.run_rocm_tests:
- config.unsupported = True
diff --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
deleted file mode 100644
index 8e27de4b60de741..000000000000000
--- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s
-
-// CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"}
-gpu.module @foo {
- llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr)
- // CHECK: attributes {gpu.kernel}
- attributes { gpu.kernel } {
- llvm.return
- }
-}
-
-// CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"}
-gpu.module @bar {
- // CHECK: func @kernel_a
- llvm.func @kernel_a()
- attributes { gpu.kernel } {
- llvm.return
- }
-
- // CHECK: func @kernel_b
- llvm.func @kernel_b()
- attributes { gpu.kernel } {
- llvm.return
- }
-}
diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
index ac96229e80a077e..80edd04b691a571 100644
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@@ -31,8 +31,6 @@ set(LIBS
)
add_mlir_library(MLIRGPUTestPasses
- TestConvertGPUKernelToCubin.cpp
- TestConvertGPUKernelToHsaco.cpp
TestGpuMemoryPromotion.cpp
TestGpuRewrite.cpp
TestLowerToNVVM.cpp
@@ -43,12 +41,3 @@ add_mlir_library(MLIRGPUTestPasses
${LIBS}
)
-# This is how it is defined in mlir/lib/Dialect/GPU/CMakeLists.txt
-# We probably want something better project-wide
-if(MLIR_ENABLE_CUDA_RUNNER)
- # Enable gpu-to-cubin pass.
- target_compile_definitions(MLIRGPUTestPasses
- PRIVATE
- MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
- )
-endif()
diff --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
deleted file mode 100644
index 1c442b0147c8b30..000000000000000
--- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- TestConvertGPUKernelToCubin.cpp - Test gpu kernel cubin lowering ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-
-using namespace mlir;
-
-#if MLIR_CUDA_CONVERSIONS_ENABLED
-namespace {
-class TestSerializeToCubinPass
- : public PassWrapper<TestSerializeToCubinPass, gpu::SerializeToBlobPass> {
-public:
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToCubinPass)
-
- StringRef getArgument() const final { return "test-gpu-to-cubin"; }
- StringRef getDescription() const final {
- return "Lower GPU kernel function to CUBIN binary annotations";
- }
- TestSerializeToCubinPass();
-
-private:
- void getDependentDialects(DialectRegistry ®istry) const override;
-
- // Serializes PTX to CUBIN.
- std::unique_ptr<std::vector<char>>
- serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-TestSerializeToCubinPass::TestSerializeToCubinPass() {
- this->triple = "nvptx64-nvidia-cuda";
- this->chip = "sm_35";
- this->features = "+ptx60";
-}
-
-void TestSerializeToCubinPass::getDependentDialects(
- DialectRegistry ®istry) const {
- registerNVVMDialectTranslation(registry);
- gpu::SerializeToBlobPass::getDependentDialects(registry);
-}
-
-std::unique_ptr<std::vector<char>>
-TestSerializeToCubinPass::serializeISA(const std::string &) {
- std::string data = "CUBIN";
- return std::make_unique<std::vector<char>>(data.begin(), data.end());
-}
-
-namespace mlir {
-namespace test {
-// Register test pass to serialize GPU module to a CUBIN binary annotation.
-void registerTestGpuSerializeToCubinPass() {
- PassRegistration<TestSerializeToCubinPass>([] {
- // Initialize LLVM NVPTX backend.
- LLVMInitializeNVPTXTarget();
- LLVMInitializeNVPTXTargetInfo();
- LLVMInitializeNVPTXTargetMC();
- LLVMInitializeNVPTXAsmPrinter();
-
- return std::make_unique<TestSerializeToCubinPass>();
- });
-}
-} // namespace test
-} // namespace mlir
-#endif // MLIR_CUDA_CONVERSIONS_ENABLED
diff --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp
deleted file mode 100644
index c204e86632ac920..000000000000000
--- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-//===- TestConvertGPUKernelToHsaco.cpp - Test gpu kernel hsaco lowering ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-
-using namespace mlir;
-
-#if MLIR_ROCM_CONVERSIONS_ENABLED
-namespace {
-class TestSerializeToHsacoPass
- : public PassWrapper<TestSerializeToHsacoPass, gpu::SerializeToBlobPass> {
-public:
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToHsacoPass)
-
- StringRef getArgument() const final { return "test-gpu-to-hsaco"; }
- StringRef getDescription() const final {
- return "Lower GPU kernel function to HSAco binary annotations";
- }
- TestSerializeToHsacoPass();
-
-private:
- void getDependentDialects(DialectRegistry ®istry) const override;
-
- // Serializes ROCDL IR to HSACO.
- std::unique_ptr<std::vector<char>>
- serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-TestSerializeToHsacoPass::TestSerializeToHsacoPass() {
- this->triple = "amdgcn-amd-amdhsa";
- this->chip = "gfx900";
-}
-
-void TestSerializeToHsacoPass::getDependentDialects(
- DialectRegistry ®istry) const {
- registerROCDLDialectTranslation(registry);
- gpu::SerializeToBlobPass::getDependentDialects(registry);
-}
-
-std::unique_ptr<std::vector<char>>
-TestSerializeToHsacoPass::serializeISA(const std::string &) {
- std::string data = "HSACO";
- return std::make_unique<std::vector<char>>(data.begin(), data.end());
-}
-
-namespace mlir {
-namespace test {
-// Register test pass to serialize GPU module to a HSAco binary annotation.
-void registerTestGpuSerializeToHsacoPass() {
- PassRegistration<TestSerializeToHsacoPass>([] {
- // Initialize LLVM AMDGPU backend.
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTargetMC();
- LLVMInitializeAMDGPUAsmPrinter();
-
- return std::make_unique<TestSerializeToHsacoPass>();
- });
-}
-} // namespace test
-} // namespace mlir
-#endif // MLIR_ROCM_CONVERSIONS_ENABLED
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index a8aeffec1ae72d0..22eca9bcff6ff27 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -80,8 +80,6 @@ void registerTestCallGraphPass();
void registerTestCfAssertPass();
void registerTestConstantFold();
void registerTestControlFlowSink();
-void registerTestGpuSerializeToCubinPass();
-void registerTestGpuSerializeToHsacoPass();
void registerTestDataLayoutPropagation();
void registerTestDataLayoutQuery();
void registerTestDeadCodeAnalysisPass();
@@ -204,11 +202,7 @@ void registerTestPasses() {
mlir::test::registerTestDiagnosticsPass();
mlir::test::registerTestDialectConversionPasses();
#if MLIR_CUDA_CONVERSIONS_ENABLED
- mlir::test::registerTestGpuSerializeToCubinPass();
mlir::test::registerTestLowerToNVVM();
-#endif
-#if MLIR_ROCM_CONVERSIONS_ENABLED
- mlir::test::registerTestGpuSerializeToHsacoPass();
#endif
mlir::test::registerTestDecomposeCallGraphTypes();
mlir::test::registerTestDataLayoutPropagation();
More information about the Mlir-commits
mailing list