[Mlir-commits] [mlir] [mlir] Fix nvvm integration tests build error (PR #70113)

Tue Oct 24 13:20:53 PDT 2023

https://github.com/grypp updated https://github.com/llvm/llvm-project/pull/70113

>From 7e0020b31688f7d1c7d7fa1b44a8efb3b8f0390e Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 24 Oct 2023 22:10:28 +0200
Subject: [PATCH 1/2] [mlir] Fix nvvm integration tests build error

#69934 broke the build of some integration tests. Appereantly, these tests are relying two flags: kernel-bare-ptr-calling-convention, and host-bare-ptr-calling-convention. So this PR brings them back.
---
 mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
index ed7634fbecf49fd..28f76bde0820a6e 100644
--- a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
@@ -65,6 +65,18 @@ struct TestLowerToNVVMOptions
       *this, "opt-level",
       llvm::cl::desc("Optimization level for NVVM compilation"),
       llvm::cl::init(2)};
+  PassOptions::Option<bool> kernelUseBarePtrCallConv{
+      *this, "kernel-bare-ptr-calling-convention",
+      llvm::cl::desc(
+          "Whether to use the bareptr calling convention on the kernel "
+          "(warning this should be false until the GPU layering is fixed)"),
+      llvm::cl::init(false)};
+  PassOptions::Option<bool> hostUseBarePtrCallConv{
+      *this, "host-bare-ptr-calling-convention",
+      llvm::cl::desc(
+          "Whether to use the bareptr calling convention on the host (warning "
+          "this should be false until the GPU layering is fixed)"),
+      llvm::cl::init(false)};
 };
 
 //===----------------------------------------------------------------------===//
@@ -105,7 +117,10 @@ void buildCommonPassPipeline(OpPassManager &pm,
 void buildGpuPassPipeline(OpPassManager &pm,
                           const TestLowerToNVVMOptions &options) {
   pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
-  pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
+  ConvertGpuOpsToNVVMOpsOptions opt;
+  opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
+  opt.indexBitwidth = options.indexBitWidth;
+  pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
   pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
   pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
   pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
@@ -116,7 +131,10 @@ void buildGpuPassPipeline(OpPassManager &pm,
 //===----------------------------------------------------------------------===//
 void buildHostPostPipeline(OpPassManager &pm,
                            const TestLowerToNVVMOptions &options) {
-  pm.addPass(createGpuToLLVMConversionPass());
+  GpuToLLVMConversionPassOptions opt;
+  opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
+  opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
+  pm.addPass(createGpuToLLVMConversionPass(opt));
 
   GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
   gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;

>From 8f5fbffea67153233c51c36df9fada1564507459 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 24 Oct 2023 22:20:32 +0200
Subject: [PATCH 2/2] remove kernel-index-bitwidth

---
 .../sm80/transform-mma-sync-matmul-f16-f16-accum.mlir           | 2 +-
 .../GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
index 26bf448a97f8123..c9f45ddad6ffcfb 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s \
 // RUN:  -transform-interpreter \
 // RUN:  -test-transform-dialect-erase-schedule \
-// RUN:  -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
+// RUN:  -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
 // RUN: | mlir-cpu-runner \
 // RUN:   --shared-libs=%mlir_cuda_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
index 4ea72f3b82c2640..367b4f32ede386d 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
@@ -11,7 +11,7 @@
 // RUN: mlir-opt %s \
 // RUN:   -transform-interpreter \
 // RUN:   -test-transform-dialect-erase-schedule \
-// RUN:   -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
+// RUN:   -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
 // RUN: | mlir-cpu-runner \
 // RUN:   --shared-libs=%mlir_cuda_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \