[Mlir-commits] [mlir] 1828deb - [mlir][gpu] Deprecate gpu::Serialization* passes. (#65857)

Mon Sep 11 13:32:19 PDT 2023

Author: Fabian Mora
Date: 2023-09-11T16:32:15-04:00
New Revision: 1828deb7524f8d371825dfa6bd39cc17b7247e54

URL: https://github.com/llvm/llvm-project/commit/1828deb7524f8d371825dfa6bd39cc17b7247e54
DIFF: https://github.com/llvm/llvm-project/commit/1828deb7524f8d371825dfa6bd39cc17b7247e54.diff

LOG: [mlir][gpu] Deprecate gpu::Serialization* passes. (#65857)

Deprecate the `gpu-to-cubin` & `gpu-to-hsaco` passes in favor of the
`TargetAttr` workflow. This patch removes remaining upstream uses of the
aforementioned passes, including the option to use them in `mlir-opt`. A
future patch will remove these passes entirely.

The passes can be re-enabled in `mlir-opt` by adding the CMake flag: `-DMLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION=1`.

Added: 
    

Modified: 
    mlir/CMakeLists.txt
    mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
    mlir/include/mlir/InitAllPasses.h
    mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
    mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
    mlir/test/lib/Dialect/GPU/CMakeLists.txt
    mlir/tools/mlir-opt/CMakeLists.txt
    mlir/tools/mlir-opt/mlir-opt.cpp

Removed: 
    mlir/test/Conversion/GPUToCUDA/lit.local.cfg
    mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
    mlir/test/Conversion/GPUToROCm/lit.local.cfg
    mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
    mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
    mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp


################################################################################
diff  --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index fa4f6e76f985fb5..bbbcb0703f20f42 100644

--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -114,6 +114,7 @@ else()
 endif()
 add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ENABLE_ROCM_CONVERSIONS})
 
+set(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION 0 CACHE BOOL "Enable deprecated GPU serialization passes")
 set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner")
 set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner")
 set(MLIR_ENABLE_SPIRV_CPU_RUNNER 0 CACHE BOOL "Enable building the mlir SPIR-V cpu runner")

diff  --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 033e8755501f967..2a891a7d24f809a 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -134,14 +134,17 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
 
 /// Register pass to serialize GPU kernel functions to a CUBIN binary
 /// annotation.
+LLVM_DEPRECATED("use Target attributes instead", "")
 void registerGpuSerializeToCubinPass();
 
 /// Register pass to serialize GPU kernel functions to a HSAco binary
 /// annotation.
+LLVM_DEPRECATED("use Target attributes instead", "")
 void registerGpuSerializeToHsacoPass();
 
 /// Create an instance of the GPU kernel function to CUBIN binary serialization
 /// pass with optLevel (default level 2).
+LLVM_DEPRECATED("use Target attributes instead", "")
 std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
                                                     StringRef chip,
                                                     StringRef features,
@@ -150,6 +153,7 @@ std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
 
 /// Create an instance of the GPU kernel function to HSAco binary serialization
 /// pass.
+LLVM_DEPRECATED("use Target attributes instead", "")
 std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
                                                     StringRef arch,
                                                     StringRef features,

diff  --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index 8f3f92ae43145d1..f7271737c66d1cb 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -65,8 +65,6 @@ inline void registerAllPasses() {
   bufferization::registerBufferizationPasses();
   func::registerFuncPasses();
   registerGPUPasses();
-  registerGpuSerializeToCubinPass();
-  registerGpuSerializeToHsacoPass();
   registerLinalgPasses();
   registerNVGPUPasses();
   registerSparseTensorPasses();

diff  --git a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
index 3cf530abd744e8e..234a0d82babef67 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
@@ -27,11 +27,3 @@ add_mlir_dialect_library(MLIRSparseTensorPipelines
   MLIRVectorToLLVM
   MLIRVectorTransforms
 )
-
-if(MLIR_ENABLE_CUDA_RUNNER)
-  # Enable gpu-to-cubin pass.
-  target_compile_definitions(obj.MLIRSparseTensorPipelines
-    PRIVATE
-    MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
-  )
-endif()

diff  --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index 24c4c4c43a93dea..37f9e09d34c04e7 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -78,11 +78,13 @@ void mlir::sparse_tensor::buildSparseCompiler(
 
   // Finalize GPU code generation.
   if (gpuCodegen) {
-#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
-    pm.addNestedPass<gpu::GPUModuleOp>(createGpuSerializeToCubinPass(
-        options.gpuTriple, options.gpuChip, options.gpuFeatures));
-#endif
+    GpuNVVMAttachTargetOptions nvvmTargetOptions;
+    nvvmTargetOptions.triple = options.gpuTriple;
+    nvvmTargetOptions.chip = options.gpuChip;
+    nvvmTargetOptions.features = options.gpuFeatures;
+    pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
     pm.addPass(createGpuToLLVMConversionPass());
+    pm.addPass(createGpuModuleToBinaryPass());
   }
 
   pm.addPass(createReconcileUnrealizedCastsPass());

diff  --git a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg b/mlir/test/Conversion/GPUToCUDA/lit.local.cfg
deleted file mode 100644
index bc470ccc5733a96..000000000000000
--- a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not config.run_cuda_tests:
-    config.unsupported = True

diff  --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
deleted file mode 100644
index 0a2ac552a7c6db1..000000000000000
--- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s
-
-// CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"}
-gpu.module @foo {
-  llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr)
-    // CHECK: attributes  {gpu.kernel}
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}
-
-// CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"}
-gpu.module @bar {
-  // CHECK: func @kernel_a
-  llvm.func @kernel_a()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-
-  // CHECK: func @kernel_b
-  llvm.func @kernel_b()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}

diff  --git a/mlir/test/Conversion/GPUToROCm/lit.local.cfg b/mlir/test/Conversion/GPUToROCm/lit.local.cfg
deleted file mode 100644
index 2f5cc9f3bad9737..000000000000000
--- a/mlir/test/Conversion/GPUToROCm/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not config.run_rocm_tests:
-    config.unsupported = True

diff  --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
deleted file mode 100644
index 8e27de4b60de741..000000000000000
--- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s
-
-// CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"}
-gpu.module @foo {
-  llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr)
-    // CHECK: attributes  {gpu.kernel}
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}
-
-// CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"}
-gpu.module @bar {
-  // CHECK: func @kernel_a
-  llvm.func @kernel_a()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-
-  // CHECK: func @kernel_b
-  llvm.func @kernel_b()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}

diff  --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
index ac96229e80a077e..80edd04b691a571 100644
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@@ -31,8 +31,6 @@ set(LIBS
   )
 
 add_mlir_library(MLIRGPUTestPasses
-  TestConvertGPUKernelToCubin.cpp
-  TestConvertGPUKernelToHsaco.cpp
   TestGpuMemoryPromotion.cpp
   TestGpuRewrite.cpp
   TestLowerToNVVM.cpp
@@ -43,12 +41,3 @@ add_mlir_library(MLIRGPUTestPasses
   ${LIBS}
   )
 
-# This is how it is defined in mlir/lib/Dialect/GPU/CMakeLists.txt
-# We probably want something better project-wide
-if(MLIR_ENABLE_CUDA_RUNNER)
-  # Enable gpu-to-cubin pass.
-  target_compile_definitions(MLIRGPUTestPasses
-    PRIVATE
-    MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
-  )
-endif()

diff  --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
deleted file mode 100644
index 1c442b0147c8b30..000000000000000
--- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- TestConvertGPUKernelToCubin.cpp - Test gpu kernel cubin lowering ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-
-using namespace mlir;
-
-#if MLIR_CUDA_CONVERSIONS_ENABLED
-namespace {
-class TestSerializeToCubinPass
-    : public PassWrapper<TestSerializeToCubinPass, gpu::SerializeToBlobPass> {
-public:
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToCubinPass)
-
-  StringRef getArgument() const final { return "test-gpu-to-cubin"; }
-  StringRef getDescription() const final {
-    return "Lower GPU kernel function to CUBIN binary annotations";
-  }
-  TestSerializeToCubinPass();
-
-private:
-  void getDependentDialects(DialectRegistry &registry) const override;
-
-  // Serializes PTX to CUBIN.
-  std::unique_ptr<std::vector<char>>
-  serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-TestSerializeToCubinPass::TestSerializeToCubinPass() {
-  this->triple = "nvptx64-nvidia-cuda";
-  this->chip = "sm_35";
-  this->features = "+ptx60";
-}
-
-void TestSerializeToCubinPass::getDependentDialects(
-    DialectRegistry &registry) const {
-  registerNVVMDialectTranslation(registry);
-  gpu::SerializeToBlobPass::getDependentDialects(registry);
-}
-
-std::unique_ptr<std::vector<char>>
-TestSerializeToCubinPass::serializeISA(const std::string &) {
-  std::string data = "CUBIN";
-  return std::make_unique<std::vector<char>>(data.begin(), data.end());
-}
-
-namespace mlir {
-namespace test {
-// Register test pass to serialize GPU module to a CUBIN binary annotation.
-void registerTestGpuSerializeToCubinPass() {
-  PassRegistration<TestSerializeToCubinPass>([] {
-    // Initialize LLVM NVPTX backend.
-    LLVMInitializeNVPTXTarget();
-    LLVMInitializeNVPTXTargetInfo();
-    LLVMInitializeNVPTXTargetMC();
-    LLVMInitializeNVPTXAsmPrinter();
-
-    return std::make_unique<TestSerializeToCubinPass>();
-  });
-}
-} // namespace test
-} // namespace mlir
-#endif // MLIR_CUDA_CONVERSIONS_ENABLED

diff  --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp
deleted file mode 100644
index c204e86632ac920..000000000000000
--- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-//===- TestConvertGPUKernelToHsaco.cpp - Test gpu kernel hsaco lowering ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-
-using namespace mlir;
-
-#if MLIR_ROCM_CONVERSIONS_ENABLED
-namespace {
-class TestSerializeToHsacoPass
-    : public PassWrapper<TestSerializeToHsacoPass, gpu::SerializeToBlobPass> {
-public:
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToHsacoPass)
-
-  StringRef getArgument() const final { return "test-gpu-to-hsaco"; }
-  StringRef getDescription() const final {
-    return "Lower GPU kernel function to HSAco binary annotations";
-  }
-  TestSerializeToHsacoPass();
-
-private:
-  void getDependentDialects(DialectRegistry &registry) const override;
-
-  // Serializes ROCDL IR to HSACO.
-  std::unique_ptr<std::vector<char>>
-  serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-TestSerializeToHsacoPass::TestSerializeToHsacoPass() {
-  this->triple = "amdgcn-amd-amdhsa";
-  this->chip = "gfx900";
-}
-
-void TestSerializeToHsacoPass::getDependentDialects(
-    DialectRegistry &registry) const {
-  registerROCDLDialectTranslation(registry);
-  gpu::SerializeToBlobPass::getDependentDialects(registry);
-}
-
-std::unique_ptr<std::vector<char>>
-TestSerializeToHsacoPass::serializeISA(const std::string &) {
-  std::string data = "HSACO";
-  return std::make_unique<std::vector<char>>(data.begin(), data.end());
-}
-
-namespace mlir {
-namespace test {
-// Register test pass to serialize GPU module to a HSAco binary annotation.
-void registerTestGpuSerializeToHsacoPass() {
-  PassRegistration<TestSerializeToHsacoPass>([] {
-    // Initialize LLVM AMDGPU backend.
-    LLVMInitializeAMDGPUTarget();
-    LLVMInitializeAMDGPUTargetInfo();
-    LLVMInitializeAMDGPUTargetMC();
-    LLVMInitializeAMDGPUAsmPrinter();
-
-    return std::make_unique<TestSerializeToHsacoPass>();
-  });
-}
-} // namespace test
-} // namespace mlir
-#endif // MLIR_ROCM_CONVERSIONS_ENABLED

diff  --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt
index df1e9480c754bcc..88a0562cb6e7207 100644
--- a/mlir/tools/mlir-opt/CMakeLists.txt
+++ b/mlir/tools/mlir-opt/CMakeLists.txt
@@ -91,3 +91,11 @@ llvm_update_compile_flags(mlir-opt)
 
 mlir_check_all_link_libraries(mlir-opt)
 export_executable_symbols_for_plugins(mlir-opt)
+
+if(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION)
+  # Enable deprecated serialization passes.
+  target_compile_definitions(mlir-opt
+    PRIVATE
+    MLIR_DEPRECATED_GPU_SERIALIZATION_ENABLE=1
+  )
+endif()

diff  --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index a8aeffec1ae72d0..b7647d7de78a10e 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -80,8 +80,6 @@ void registerTestCallGraphPass();
 void registerTestCfAssertPass();
 void registerTestConstantFold();
 void registerTestControlFlowSink();
-void registerTestGpuSerializeToCubinPass();
-void registerTestGpuSerializeToHsacoPass();
 void registerTestDataLayoutPropagation();
 void registerTestDataLayoutQuery();
 void registerTestDeadCodeAnalysisPass();
@@ -204,11 +202,7 @@ void registerTestPasses() {
   mlir::test::registerTestDiagnosticsPass();
   mlir::test::registerTestDialectConversionPasses();
 #if MLIR_CUDA_CONVERSIONS_ENABLED
-  mlir::test::registerTestGpuSerializeToCubinPass();
   mlir::test::registerTestLowerToNVVM();
-#endif
-#if MLIR_ROCM_CONVERSIONS_ENABLED
-  mlir::test::registerTestGpuSerializeToHsacoPass();
 #endif
   mlir::test::registerTestDecomposeCallGraphTypes();
   mlir::test::registerTestDataLayoutPropagation();
@@ -270,6 +264,10 @@ void registerTestPasses() {
 
 int main(int argc, char **argv) {
   registerAllPasses();
+#if MLIR_DEPRECATED_GPU_SERIALIZATION_ENABLE == 1
+  registerGpuSerializeToCubinPass();
+  registerGpuSerializeToHsacoPass();
+#endif
 #ifdef MLIR_INCLUDE_TESTS
   registerTestPasses();
 #endif