[Mlir-commits] [mlir] [mlir][gpu] Deprecate gpu::Serialziation* passes. (PR #65857)

Sat Sep 9 11:30:58 PDT 2023

https://github.com/fabianmcg created https://github.com/llvm/llvm-project/pull/65857:

Deprecate the `gpu-to-cubin` & `gpu-to-hsaco` passes in favor of the `TargetAttr` workflow. This patch removes remaining upstream uses of the aforementioned passes, including the option to use them in `mlir-opt`. A future patch will remove these passes entirely.

**NOTE:**
  1. When testing on an NVIDIA A100 the test `Integration/Dialect/SparseTensor/GPU/CUDA/sparse-gemm-lib.mlir`  failed with:
```
'cuMemAlloc(&ptr, sizeBytes)' failed with 'CUDA_ERROR_INVALID_VALUE'
```
  However, the test failed even without the switch to the new workflow, if someone else could test and verify it would be appreciate it. All other tests succeeded including: `CUDA_SM80_LT_TESTS`.
  2. The [SM_90 integration tests](https://github.com/llvm/llvm-project/tree/main/mlir/test/Integration/GPU/CUDA/sm90) still need to be ported into the new workflow, so this patch is dependent on that porting.

>From 06178712668fe368f83d06d233710dbfd83c49ba Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Fri, 8 Sep 2023 22:04:58 +0000
Subject: [PATCH] [mlir][gpu] Deprecate gpu::Serialziation* passes.

Deprecate the `gpu-to-cubin` & `gpu-to-hsaco` passes in favor of the `TargetAttr`
workflow. This patch removes remaining upstream uses of the aforementioned passes,
including the option to use them in `mlir-opt`. A future patch will remove these
passes entirely.
---
 .../mlir/Dialect/GPU/Transforms/Passes.h      |  4 +
 mlir/include/mlir/InitAllPasses.h             |  2 -
 .../SparseTensor/Pipelines/CMakeLists.txt     |  8 --
 .../Pipelines/SparseTensorPipelines.cpp       | 10 ++-
 mlir/test/Conversion/GPUToCUDA/lit.local.cfg  |  2 -
 .../GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir | 25 -------
 mlir/test/Conversion/GPUToROCm/lit.local.cfg  |  2 -
 .../lower-rocdl-kernel-to-hsaco.mlir          | 25 -------
 mlir/test/lib/Dialect/GPU/CMakeLists.txt      | 11 ---
 .../GPU/TestConvertGPUKernelToCubin.cpp       | 73 -------------------
 .../GPU/TestConvertGPUKernelToHsaco.cpp       | 72 ------------------
 mlir/tools/mlir-opt/mlir-opt.cpp              |  6 --
 12 files changed, 10 insertions(+), 230 deletions(-)
 delete mode 100644 mlir/test/Conversion/GPUToCUDA/lit.local.cfg
 delete mode 100644 mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
 delete mode 100644 mlir/test/Conversion/GPUToROCm/lit.local.cfg
 delete mode 100644 mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
 delete mode 100644 mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
 delete mode 100644 mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp

diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index 033e8755501f967..2a891a7d24f809a 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -134,14 +134,17 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
 
 /// Register pass to serialize GPU kernel functions to a CUBIN binary
 /// annotation.
+LLVM_DEPRECATED("use Target attributes instead", "")
 void registerGpuSerializeToCubinPass();
 
 /// Register pass to serialize GPU kernel functions to a HSAco binary
 /// annotation.
+LLVM_DEPRECATED("use Target attributes instead", "")
 void registerGpuSerializeToHsacoPass();
 
 /// Create an instance of the GPU kernel function to CUBIN binary serialization
 /// pass with optLevel (default level 2).
+LLVM_DEPRECATED("use Target attributes instead", "")
 std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
                                                     StringRef chip,
                                                     StringRef features,
@@ -150,6 +153,7 @@ std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
 
 /// Create an instance of the GPU kernel function to HSAco binary serialization
 /// pass.
+LLVM_DEPRECATED("use Target attributes instead", "")
 std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
                                                     StringRef arch,
                                                     StringRef features,
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index 8f3f92ae43145d1..f7271737c66d1cb 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -65,8 +65,6 @@ inline void registerAllPasses() {
   bufferization::registerBufferizationPasses();
   func::registerFuncPasses();
   registerGPUPasses();
-  registerGpuSerializeToCubinPass();
-  registerGpuSerializeToHsacoPass();
   registerLinalgPasses();
   registerNVGPUPasses();
   registerSparseTensorPasses();
diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
index 3cf530abd744e8e..234a0d82babef67 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/CMakeLists.txt
@@ -27,11 +27,3 @@ add_mlir_dialect_library(MLIRSparseTensorPipelines
   MLIRVectorToLLVM
   MLIRVectorTransforms
 )
-
-if(MLIR_ENABLE_CUDA_RUNNER)
-  # Enable gpu-to-cubin pass.
-  target_compile_definitions(obj.MLIRSparseTensorPipelines
-    PRIVATE
-    MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
-  )
-endif()
diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index 24c4c4c43a93dea..37f9e09d34c04e7 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -78,11 +78,13 @@ void mlir::sparse_tensor::buildSparseCompiler(
 
   // Finalize GPU code generation.
   if (gpuCodegen) {
-#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
-    pm.addNestedPass<gpu::GPUModuleOp>(createGpuSerializeToCubinPass(
-        options.gpuTriple, options.gpuChip, options.gpuFeatures));
-#endif
+    GpuNVVMAttachTargetOptions nvvmTargetOptions;
+    nvvmTargetOptions.triple = options.gpuTriple;
+    nvvmTargetOptions.chip = options.gpuChip;
+    nvvmTargetOptions.features = options.gpuFeatures;
+    pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
     pm.addPass(createGpuToLLVMConversionPass());
+    pm.addPass(createGpuModuleToBinaryPass());
   }
 
   pm.addPass(createReconcileUnrealizedCastsPass());
diff --git a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg b/mlir/test/Conversion/GPUToCUDA/lit.local.cfg
deleted file mode 100644
index bc470ccc5733a96..000000000000000
--- a/mlir/test/Conversion/GPUToCUDA/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not config.run_cuda_tests:
-    config.unsupported = True
diff --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
deleted file mode 100644
index 0a2ac552a7c6db1..000000000000000
--- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: mlir-opt %s --test-gpu-to-cubin | FileCheck %s
-
-// CHECK: gpu.module @foo attributes {gpu.binary = "CUBIN"}
-gpu.module @foo {
-  llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr)
-    // CHECK: attributes  {gpu.kernel}
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}
-
-// CHECK: gpu.module @bar attributes {gpu.binary = "CUBIN"}
-gpu.module @bar {
-  // CHECK: func @kernel_a
-  llvm.func @kernel_a()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-
-  // CHECK: func @kernel_b
-  llvm.func @kernel_b()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}
diff --git a/mlir/test/Conversion/GPUToROCm/lit.local.cfg b/mlir/test/Conversion/GPUToROCm/lit.local.cfg
deleted file mode 100644
index 2f5cc9f3bad9737..000000000000000
--- a/mlir/test/Conversion/GPUToROCm/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not config.run_rocm_tests:
-    config.unsupported = True
diff --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
deleted file mode 100644
index 8e27de4b60de741..000000000000000
--- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s
-
-// CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"}
-gpu.module @foo {
-  llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr)
-    // CHECK: attributes  {gpu.kernel}
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}
-
-// CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"}
-gpu.module @bar {
-  // CHECK: func @kernel_a
-  llvm.func @kernel_a()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-
-  // CHECK: func @kernel_b
-  llvm.func @kernel_b()
-    attributes  { gpu.kernel } {
-    llvm.return
-  }
-}
diff --git a/mlir/test/lib/Dialect/GPU/CMakeLists.txt b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
index ac96229e80a077e..80edd04b691a571 100644
--- a/mlir/test/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/GPU/CMakeLists.txt
@@ -31,8 +31,6 @@ set(LIBS
   )
 
 add_mlir_library(MLIRGPUTestPasses
-  TestConvertGPUKernelToCubin.cpp
-  TestConvertGPUKernelToHsaco.cpp
   TestGpuMemoryPromotion.cpp
   TestGpuRewrite.cpp
   TestLowerToNVVM.cpp
@@ -43,12 +41,3 @@ add_mlir_library(MLIRGPUTestPasses
   ${LIBS}
   )
 
-# This is how it is defined in mlir/lib/Dialect/GPU/CMakeLists.txt
-# We probably want something better project-wide
-if(MLIR_ENABLE_CUDA_RUNNER)
-  # Enable gpu-to-cubin pass.
-  target_compile_definitions(MLIRGPUTestPasses
-    PRIVATE
-    MLIR_GPU_TO_CUBIN_PASS_ENABLE=1
-  )
-endif()
diff --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
deleted file mode 100644
index 1c442b0147c8b30..000000000000000
--- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToCubin.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//===- TestConvertGPUKernelToCubin.cpp - Test gpu kernel cubin lowering ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-
-using namespace mlir;
-
-#if MLIR_CUDA_CONVERSIONS_ENABLED
-namespace {
-class TestSerializeToCubinPass
-    : public PassWrapper<TestSerializeToCubinPass, gpu::SerializeToBlobPass> {
-public:
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToCubinPass)
-
-  StringRef getArgument() const final { return "test-gpu-to-cubin"; }
-  StringRef getDescription() const final {
-    return "Lower GPU kernel function to CUBIN binary annotations";
-  }
-  TestSerializeToCubinPass();
-
-private:
-  void getDependentDialects(DialectRegistry &registry) const override;
-
-  // Serializes PTX to CUBIN.
-  std::unique_ptr<std::vector<char>>
-  serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-TestSerializeToCubinPass::TestSerializeToCubinPass() {
-  this->triple = "nvptx64-nvidia-cuda";
-  this->chip = "sm_35";
-  this->features = "+ptx60";
-}
-
-void TestSerializeToCubinPass::getDependentDialects(
-    DialectRegistry &registry) const {
-  registerNVVMDialectTranslation(registry);
-  gpu::SerializeToBlobPass::getDependentDialects(registry);
-}
-
-std::unique_ptr<std::vector<char>>
-TestSerializeToCubinPass::serializeISA(const std::string &) {
-  std::string data = "CUBIN";
-  return std::make_unique<std::vector<char>>(data.begin(), data.end());
-}
-
-namespace mlir {
-namespace test {
-// Register test pass to serialize GPU module to a CUBIN binary annotation.
-void registerTestGpuSerializeToCubinPass() {
-  PassRegistration<TestSerializeToCubinPass>([] {
-    // Initialize LLVM NVPTX backend.
-    LLVMInitializeNVPTXTarget();
-    LLVMInitializeNVPTXTargetInfo();
-    LLVMInitializeNVPTXTargetMC();
-    LLVMInitializeNVPTXAsmPrinter();
-
-    return std::make_unique<TestSerializeToCubinPass>();
-  });
-}
-} // namespace test
-} // namespace mlir
-#endif // MLIR_CUDA_CONVERSIONS_ENABLED
diff --git a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp b/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp
deleted file mode 100644
index c204e86632ac920..000000000000000
--- a/mlir/test/lib/Dialect/GPU/TestConvertGPUKernelToHsaco.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-//===- TestConvertGPUKernelToHsaco.cpp - Test gpu kernel hsaco lowering ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-
-using namespace mlir;
-
-#if MLIR_ROCM_CONVERSIONS_ENABLED
-namespace {
-class TestSerializeToHsacoPass
-    : public PassWrapper<TestSerializeToHsacoPass, gpu::SerializeToBlobPass> {
-public:
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestSerializeToHsacoPass)
-
-  StringRef getArgument() const final { return "test-gpu-to-hsaco"; }
-  StringRef getDescription() const final {
-    return "Lower GPU kernel function to HSAco binary annotations";
-  }
-  TestSerializeToHsacoPass();
-
-private:
-  void getDependentDialects(DialectRegistry &registry) const override;
-
-  // Serializes ROCDL IR to HSACO.
-  std::unique_ptr<std::vector<char>>
-  serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-TestSerializeToHsacoPass::TestSerializeToHsacoPass() {
-  this->triple = "amdgcn-amd-amdhsa";
-  this->chip = "gfx900";
-}
-
-void TestSerializeToHsacoPass::getDependentDialects(
-    DialectRegistry &registry) const {
-  registerROCDLDialectTranslation(registry);
-  gpu::SerializeToBlobPass::getDependentDialects(registry);
-}
-
-std::unique_ptr<std::vector<char>>
-TestSerializeToHsacoPass::serializeISA(const std::string &) {
-  std::string data = "HSACO";
-  return std::make_unique<std::vector<char>>(data.begin(), data.end());
-}
-
-namespace mlir {
-namespace test {
-// Register test pass to serialize GPU module to a HSAco binary annotation.
-void registerTestGpuSerializeToHsacoPass() {
-  PassRegistration<TestSerializeToHsacoPass>([] {
-    // Initialize LLVM AMDGPU backend.
-    LLVMInitializeAMDGPUTarget();
-    LLVMInitializeAMDGPUTargetInfo();
-    LLVMInitializeAMDGPUTargetMC();
-    LLVMInitializeAMDGPUAsmPrinter();
-
-    return std::make_unique<TestSerializeToHsacoPass>();
-  });
-}
-} // namespace test
-} // namespace mlir
-#endif // MLIR_ROCM_CONVERSIONS_ENABLED
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index a8aeffec1ae72d0..22eca9bcff6ff27 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -80,8 +80,6 @@ void registerTestCallGraphPass();
 void registerTestCfAssertPass();
 void registerTestConstantFold();
 void registerTestControlFlowSink();
-void registerTestGpuSerializeToCubinPass();
-void registerTestGpuSerializeToHsacoPass();
 void registerTestDataLayoutPropagation();
 void registerTestDataLayoutQuery();
 void registerTestDeadCodeAnalysisPass();
@@ -204,11 +202,7 @@ void registerTestPasses() {
   mlir::test::registerTestDiagnosticsPass();
   mlir::test::registerTestDialectConversionPasses();
 #if MLIR_CUDA_CONVERSIONS_ENABLED
-  mlir::test::registerTestGpuSerializeToCubinPass();
   mlir::test::registerTestLowerToNVVM();
-#endif
-#if MLIR_ROCM_CONVERSIONS_ENABLED
-  mlir::test::registerTestGpuSerializeToHsacoPass();
 #endif
   mlir::test::registerTestDecomposeCallGraphTypes();
   mlir::test::registerTestDataLayoutPropagation();