[Mlir-commits] [mlir] [mlir] Fix nvvm integration tests build error (PR #70113)
Guray Ozen
llvmlistbot at llvm.org
Tue Oct 24 13:20:53 PDT 2023
https://github.com/grypp updated https://github.com/llvm/llvm-project/pull/70113
>From 7e0020b31688f7d1c7d7fa1b44a8efb3b8f0390e Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 24 Oct 2023 22:10:28 +0200
Subject: [PATCH 1/2] [mlir] Fix nvvm integration tests build error
#69934 broke the build of some integration tests. Appereantly, these tests are relying two flags: kernel-bare-ptr-calling-convention, and host-bare-ptr-calling-convention. So this PR brings them back.
---
mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp | 22 +++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
index ed7634fbecf49fd..28f76bde0820a6e 100644
--- a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
@@ -65,6 +65,18 @@ struct TestLowerToNVVMOptions
*this, "opt-level",
llvm::cl::desc("Optimization level for NVVM compilation"),
llvm::cl::init(2)};
+ PassOptions::Option<bool> kernelUseBarePtrCallConv{
+ *this, "kernel-bare-ptr-calling-convention",
+ llvm::cl::desc(
+ "Whether to use the bareptr calling convention on the kernel "
+ "(warning this should be false until the GPU layering is fixed)"),
+ llvm::cl::init(false)};
+ PassOptions::Option<bool> hostUseBarePtrCallConv{
+ *this, "host-bare-ptr-calling-convention",
+ llvm::cl::desc(
+ "Whether to use the bareptr calling convention on the host (warning "
+ "this should be false until the GPU layering is fixed)"),
+ llvm::cl::init(false)};
};
//===----------------------------------------------------------------------===//
@@ -105,7 +117,10 @@ void buildCommonPassPipeline(OpPassManager &pm,
void buildGpuPassPipeline(OpPassManager &pm,
const TestLowerToNVVMOptions &options) {
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
- pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
+ ConvertGpuOpsToNVVMOpsOptions opt;
+ opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
+ opt.indexBitwidth = options.indexBitWidth;
+ pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
@@ -116,7 +131,10 @@ void buildGpuPassPipeline(OpPassManager &pm,
//===----------------------------------------------------------------------===//
void buildHostPostPipeline(OpPassManager &pm,
const TestLowerToNVVMOptions &options) {
- pm.addPass(createGpuToLLVMConversionPass());
+ GpuToLLVMConversionPassOptions opt;
+ opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
+ opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
+ pm.addPass(createGpuToLLVMConversionPass(opt));
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
>From 8f5fbffea67153233c51c36df9fada1564507459 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 24 Oct 2023 22:20:32 +0200
Subject: [PATCH 2/2] remove kernel-index-bitwidth
---
.../sm80/transform-mma-sync-matmul-f16-f16-accum.mlir | 2 +-
.../GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
index 26bf448a97f8123..c9f45ddad6ffcfb 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter \
// RUN: -test-transform-dialect-erase-schedule \
-// RUN: -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
+// RUN: -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_cuda_runtime \
// RUN: --shared-libs=%mlir_runner_utils \
diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
index 4ea72f3b82c2640..367b4f32ede386d 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
@@ -11,7 +11,7 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter \
// RUN: -test-transform-dialect-erase-schedule \
-// RUN: -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
+// RUN: -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_cuda_runtime \
// RUN: --shared-libs=%mlir_runner_utils \
More information about the Mlir-commits
mailing list