[Mlir-commits] [mlir] f8058a3 - [mlir] Fix nvvm integration tests build error (#70113)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Oct 24 13:32:50 PDT 2023
Author: Guray Ozen
Date: 2023-10-24T22:32:46+02:00
New Revision: f8058a37aecbc1f5e53ede5e139f74a397f16f97
URL: https://github.com/llvm/llvm-project/commit/f8058a37aecbc1f5e53ede5e139f74a397f16f97
DIFF: https://github.com/llvm/llvm-project/commit/f8058a37aecbc1f5e53ede5e139f74a397f16f97.diff
LOG: [mlir] Fix nvvm integration tests build error (#70113)
#69934 broke integration tests that rely on the
kernel-bare-ptr-calling-convention and host-bare-ptr-calling-convention
flags. This PR brings these flags.
Also the kernel-index-bitwidth flag is removed, as kernel pointer size
depends on the host. Separating host (64-bit) and kernel (32-bit) is not
viable.
Added:
Modified:
mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
Removed:
################################################################################
diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
index 26bf448a97f8123..c9f45ddad6ffcfb 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter \
// RUN: -test-transform-dialect-erase-schedule \
-// RUN: -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
+// RUN: -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_cuda_runtime \
// RUN: --shared-libs=%mlir_runner_utils \
diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
index 4ea72f3b82c2640..367b4f32ede386d 100644
--- a/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
+++ b/mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir
@@ -11,7 +11,7 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter \
// RUN: -test-transform-dialect-erase-schedule \
-// RUN: -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
+// RUN: -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
// RUN: | mlir-cpu-runner \
// RUN: --shared-libs=%mlir_cuda_runtime \
// RUN: --shared-libs=%mlir_runner_utils \
diff --git a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
index ed7634fbecf49fd..28f76bde0820a6e 100644
--- a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
@@ -65,6 +65,18 @@ struct TestLowerToNVVMOptions
*this, "opt-level",
llvm::cl::desc("Optimization level for NVVM compilation"),
llvm::cl::init(2)};
+ PassOptions::Option<bool> kernelUseBarePtrCallConv{
+ *this, "kernel-bare-ptr-calling-convention",
+ llvm::cl::desc(
+ "Whether to use the bareptr calling convention on the kernel "
+ "(warning this should be false until the GPU layering is fixed)"),
+ llvm::cl::init(false)};
+ PassOptions::Option<bool> hostUseBarePtrCallConv{
+ *this, "host-bare-ptr-calling-convention",
+ llvm::cl::desc(
+ "Whether to use the bareptr calling convention on the host (warning "
+ "this should be false until the GPU layering is fixed)"),
+ llvm::cl::init(false)};
};
//===----------------------------------------------------------------------===//
@@ -105,7 +117,10 @@ void buildCommonPassPipeline(OpPassManager &pm,
void buildGpuPassPipeline(OpPassManager &pm,
const TestLowerToNVVMOptions &options) {
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
- pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
+ ConvertGpuOpsToNVVMOpsOptions opt;
+ opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
+ opt.indexBitwidth = options.indexBitWidth;
+ pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
@@ -116,7 +131,10 @@ void buildGpuPassPipeline(OpPassManager &pm,
//===----------------------------------------------------------------------===//
void buildHostPostPipeline(OpPassManager &pm,
const TestLowerToNVVMOptions &options) {
- pm.addPass(createGpuToLLVMConversionPass());
+ GpuToLLVMConversionPassOptions opt;
+ opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
+ opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
+ pm.addPass(createGpuToLLVMConversionPass(opt));
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
More information about the Mlir-commits
mailing list