[Mlir-commits] [mlir] [MLIR] Fixes NVGPU Integration Test Passes (PR #69934)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Oct 23 08:17:35 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-gpu
Author: Guray Ozen (grypp)
<details>
<summary>Changes</summary>
The test-`lower-to-nvvm pipeline`, designed for NVGPU dialect within GPU kernels, plays important role for compiling integration tests. Thiks PR restructured the passes, and cleaned up the code.
---
Full diff: https://github.com/llvm/llvm-project/pull/69934.diff
1 Files Affected:
- (modified) mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp (+55-227)
``````````diff
diff --git a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
index b5af22f23a77cbc..c4cc0d5ae38d9be 100644
--- a/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
+++ b/mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp
@@ -28,6 +28,8 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Linalg/Passes.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassOptions.h"
@@ -39,27 +41,11 @@ using namespace mlir;
namespace {
struct TestLowerToNVVMOptions
: public PassPipelineOptions<TestLowerToNVVMOptions> {
- PassOptions::Option<int64_t> hostIndexBitWidth{
- *this, "host-index-bitwidth",
+ PassOptions::Option<int64_t> indexBitWidth{
+ *this, "index-bitwidth",
llvm::cl::desc("Bitwidth of the index type for the host (warning this "
"should be 64 until the GPU layering is fixed)"),
llvm::cl::init(64)};
- PassOptions::Option<bool> hostUseBarePtrCallConv{
- *this, "host-bare-ptr-calling-convention",
- llvm::cl::desc(
- "Whether to use the bareptr calling convention on the host (warning "
- "this should be false until the GPU layering is fixed)"),
- llvm::cl::init(false)};
- PassOptions::Option<int64_t> kernelIndexBitWidth{
- *this, "kernel-index-bitwidth",
- llvm::cl::desc("Bitwidth of the index type for the GPU kernels"),
- llvm::cl::init(64)};
- PassOptions::Option<bool> kernelUseBarePtrCallConv{
- *this, "kernel-bare-ptr-calling-convention",
- llvm::cl::desc(
- "Whether to use the bareptr calling convention on the kernel "
- "(warning this should be false until the GPU layering is fixed)"),
- llvm::cl::init(false)};
PassOptions::Option<std::string> cubinTriple{
*this, "cubin-triple",
llvm::cl::desc("Triple to use to serialize to cubin."),
@@ -74,175 +60,78 @@ struct TestLowerToNVVMOptions
PassOptions::Option<std::string> cubinFormat{
*this, "cubin-format",
llvm::cl::desc("Compilation format to use to serialize to cubin."),
- llvm::cl::init("isa")};
+ llvm::cl::init("bin")};
PassOptions::Option<int> optLevel{
*this, "opt-level",
llvm::cl::desc("Optimization level for NVVM compilation"),
llvm::cl::init(2)};
};
+//===----------------------------------------------------------------------===//
+// Common pipeline
+//===----------------------------------------------------------------------===//
+void buildCommonPassPipeline(OpPassManager &pm,
+ const TestLowerToNVVMOptions &options) {
+ pm.addPass(createConvertNVGPUToNVVMPass());
+ pm.addPass(createGpuKernelOutliningPass());
+ pm.addPass(createConvertLinalgToLoopsPass());
+ pm.addPass(createConvertVectorToSCFPass());
+ pm.addPass(createConvertSCFToCFPass());
+ pm.addPass(createConvertNVVMToLLVMPass());
+ pm.addPass(createConvertVectorToLLVMPass());
+ pm.addPass(createConvertMathToLLVMPass());
+ pm.addPass(createFinalizeMemRefToLLVMConversionPass());
+ pm.addPass(createConvertFuncToLLVMPass());
+ pm.addPass(memref::createExpandStridedMetadataPass());
+
+ GpuNVVMAttachTargetOptions nvvmTargetOptions;
+ nvvmTargetOptions.triple = options.cubinTriple;
+ nvvmTargetOptions.chip = options.cubinChip;
+ nvvmTargetOptions.features = options.cubinFeatures;
+ nvvmTargetOptions.optLevel = options.optLevel;
+ pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
+ pm.addPass(createLowerAffinePass());
+ pm.addPass(createArithToLLVMConversionPass());
+ ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
+ convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth;
+ pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
+ pm.addPass(createCanonicalizerPass());
+ pm.addPass(createCSEPass());
+}
+
//===----------------------------------------------------------------------===//
// GPUModule-specific stuff.
//===----------------------------------------------------------------------===//
void buildGpuPassPipeline(OpPassManager &pm,
const TestLowerToNVVMOptions &options) {
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
+}
- pm.addNestedPass<gpu::GPUModuleOp>(createConvertVectorToSCFPass());
- // Convert SCF to CF (always needed).
- pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
- // Convert Math to LLVM (always needed).
- pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToLLVMPass());
- // Expand complicated MemRef operations before lowering them.
- pm.addNestedPass<gpu::GPUModuleOp>(memref::createExpandStridedMetadataPass());
- // The expansion may create affine expressions. Get rid of them.
- pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());
-
- // Convert MemRef to LLVM (always needed).
- // TODO: C++20 designated initializers.
- FinalizeMemRefToLLVMConversionPassOptions
- finalizeMemRefToLLVMConversionPassOptions;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- finalizeMemRefToLLVMConversionPassOptions.indexBitwidth =
- options.kernelIndexBitWidth;
- finalizeMemRefToLLVMConversionPassOptions.useOpaquePointers = true;
- pm.addNestedPass<gpu::GPUModuleOp>(createFinalizeMemRefToLLVMConversionPass(
- finalizeMemRefToLLVMConversionPassOptions));
-
- // Convert Func to LLVM (always needed).
- // TODO: C++20 designated initializers.
- ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- convertFuncToLLVMPassOptions.indexBitwidth = options.kernelIndexBitWidth;
- convertFuncToLLVMPassOptions.useBarePtrCallConv =
- options.kernelUseBarePtrCallConv;
- convertFuncToLLVMPassOptions.useOpaquePointers = true;
- pm.addNestedPass<gpu::GPUModuleOp>(
- createConvertFuncToLLVMPass(convertFuncToLLVMPassOptions));
-
- // TODO: C++20 designated initializers.
- ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- convertIndexToLLVMPassOpt.indexBitwidth = options.kernelIndexBitWidth;
- pm.addNestedPass<gpu::GPUModuleOp>(
- createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
-
- // TODO: C++20 designated initializers.
- // The following pass is inconsistent.
- // TODO: fix inconsistence.
- ConvertGpuOpsToNVVMOpsOptions convertGpuOpsToNVVMOpsOptions;
- convertGpuOpsToNVVMOpsOptions.useBarePtrCallConv =
- options.kernelUseBarePtrCallConv;
- convertGpuOpsToNVVMOpsOptions.indexBitwidth = options.kernelIndexBitWidth;
- convertGpuOpsToNVVMOpsOptions.useOpaquePointers = true;
- pm.addNestedPass<gpu::GPUModuleOp>(
- createConvertGpuOpsToNVVMOps(convertGpuOpsToNVVMOpsOptions));
-
- pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
-
- // Convert vector to LLVM (always needed).
- // TODO: C++20 designated initializers.
- ConvertVectorToLLVMPassOptions convertVectorToLLVMPassOptions;
- convertVectorToLLVMPassOptions.reassociateFPReductions = true;
- pm.addNestedPass<gpu::GPUModuleOp>(
- createConvertVectorToLLVMPass(convertVectorToLLVMPassOptions));
-
- // This pass is needed for PTX building
- pm.addNestedPass<gpu::GPUModuleOp>(createConvertNVVMToLLVMPass());
+//===----------------------------------------------------------------------===//
+// Host Post-GPU pipeline
+//===----------------------------------------------------------------------===//
+void buildHostPostPipeline(OpPassManager &pm,
+ const TestLowerToNVVMOptions &options) {
+ pm.addPass(createGpuToLLVMConversionPass());
- // Sprinkle some cleanups.
+ GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
+ gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
+ pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
-
- // Finally we can reconcile unrealized casts.
- pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
+ pm.addPass(createReconcileUnrealizedCastsPass());
}
void buildLowerToNVVMPassPipeline(OpPassManager &pm,
const TestLowerToNVVMOptions &options) {
- // Start with a cleanup pass.
- pm.addPass(createCanonicalizerPass());
- pm.addPass(createCSEPass());
-
//===----------------------------------------------------------------------===//
- // NVGPU lowers device code as well as host code to the driver, so must run
- // before outlining.
+ // Common pipeline
//===----------------------------------------------------------------------===//
- // TODO: C++20 designated initializers.
- ConvertNVGPUToNVVMPassOptions convertNVGPUToNVVMPassOptions;
- convertNVGPUToNVVMPassOptions.useOpaquePointers = true;
- pm.addNestedPass<func::FuncOp>(
- createConvertNVGPUToNVVMPass(convertNVGPUToNVVMPassOptions));
-
- //===----------------------------------------------------------------------===//
- // Host-specific stuff.
- //===----------------------------------------------------------------------===//
- // Important, must be run at the top-level.
- pm.addPass(createGpuKernelOutliningPass());
-
- // Important, all host passes must be run at the func level so that host
- // conversions can remain with 64 bit indices without polluting the GPU
- // kernel that may have 32 bit indices.
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- pm.addNestedPass<func::FuncOp>(createConvertVectorToSCFPass());
- // Convert SCF to CF (always needed).
- pm.addNestedPass<func::FuncOp>(createConvertSCFToCFPass());
- // Convert Math to LLVM (always needed).
- pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
- // Expand complicated MemRef operations before lowering them.
- pm.addNestedPass<func::FuncOp>(memref::createExpandStridedMetadataPass());
- // The expansion may create affine expressions. Get rid of them.
- pm.addNestedPass<func::FuncOp>(createLowerAffinePass());
-
- // Convert MemRef to LLVM (always needed).
- // TODO: C++20 designated initializers.
- FinalizeMemRefToLLVMConversionPassOptions
- finalizeMemRefToLLVMConversionPassOptions;
- finalizeMemRefToLLVMConversionPassOptions.useAlignedAlloc = true;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- finalizeMemRefToLLVMConversionPassOptions.indexBitwidth =
- options.hostIndexBitWidth;
- finalizeMemRefToLLVMConversionPassOptions.useOpaquePointers = true;
- pm.addNestedPass<func::FuncOp>(createFinalizeMemRefToLLVMConversionPass(
- finalizeMemRefToLLVMConversionPassOptions));
-
- // Convert Func to LLVM (always needed).
- // TODO: C++20 designated initializers.
- ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- convertFuncToLLVMPassOptions.indexBitwidth = options.hostIndexBitWidth;
- convertFuncToLLVMPassOptions.useBarePtrCallConv =
- options.hostUseBarePtrCallConv;
- convertFuncToLLVMPassOptions.useOpaquePointers = true;
- pm.addNestedPass<func::FuncOp>(
- createConvertFuncToLLVMPass(convertFuncToLLVMPassOptions));
-
- // TODO: C++20 designated initializers.
- ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- convertIndexToLLVMPassOpt.indexBitwidth = options.hostIndexBitWidth;
- pm.addNestedPass<func::FuncOp>(
- createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
-
- pm.addNestedPass<func::FuncOp>(createArithToLLVMConversionPass());
-
- // Sprinkle some cleanups.
- pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
- pm.addNestedPass<func::FuncOp>(createCSEPass());
+ buildCommonPassPipeline(pm, options);
//===----------------------------------------------------------------------===//
// GPUModule-specific stuff.
@@ -252,68 +141,7 @@ void buildLowerToNVVMPassPipeline(OpPassManager &pm,
//===----------------------------------------------------------------------===//
// Host post-GPUModule-specific stuff.
//===----------------------------------------------------------------------===//
- // Attach an NVVM target to all the GPU modules with the provided target
- // options.
- // TODO: C++20 designated initializers.
- GpuNVVMAttachTargetOptions nvvmTargetOptions;
- nvvmTargetOptions.triple = options.cubinTriple;
- nvvmTargetOptions.chip = options.cubinChip;
- nvvmTargetOptions.features = options.cubinFeatures;
- nvvmTargetOptions.optLevel = options.optLevel;
- pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
-
- // Convert GPU to LLVM.
- // TODO: C++20 designated initializers.
- GpuToLLVMConversionPassOptions gpuToLLVMConversionOptions;
- // Note: hostBarePtrCallConv must be false for now otherwise
- // gpu::HostRegister is ill-defined: it wants unranked memrefs but can't
- // lower the to bare ptr.
- gpuToLLVMConversionOptions.hostBarePtrCallConv =
- options.hostUseBarePtrCallConv;
- gpuToLLVMConversionOptions.kernelBarePtrCallConv =
- options.kernelUseBarePtrCallConv;
- gpuToLLVMConversionOptions.useOpaquePointers = true;
-
- // TODO: something useful here.
- // gpuToLLVMConversionOptions.gpuBinaryAnnotation = "";
- pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMConversionOptions));
-
- // Serialize all GPU modules to binaries.
- GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
- gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
- pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
-
- // Convert vector to LLVM (always needed).
- // TODO: C++20 designated initializers.
- ConvertVectorToLLVMPassOptions convertVectorToLLVMPassOptions;
- convertVectorToLLVMPassOptions.reassociateFPReductions = true;
- pm.addNestedPass<func::FuncOp>(
- createConvertVectorToLLVMPass(convertVectorToLLVMPassOptions));
-
- ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt3;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- // TODO: fix GPU layering.
- convertIndexToLLVMPassOpt3.indexBitwidth = options.hostIndexBitWidth;
- pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt3));
-
- // Convert Func to LLVM (always needed).
- // TODO: C++20 designated initializers.
- ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions2;
- // Must be 64b on the host, things don't compose properly around
- // gpu::LaunchOp and gpu::HostRegisterOp.
- convertFuncToLLVMPassOptions2.indexBitwidth = options.hostIndexBitWidth;
- convertFuncToLLVMPassOptions2.useBarePtrCallConv =
- options.hostUseBarePtrCallConv;
- convertFuncToLLVMPassOptions2.useOpaquePointers = true;
- pm.addPass(createConvertFuncToLLVMPass(convertFuncToLLVMPassOptions2));
-
- // Sprinkle some cleanups.
- pm.addPass(createCanonicalizerPass());
- pm.addPass(createCSEPass());
-
- // Finally we can reconcile unrealized casts.
- pm.addPass(createReconcileUnrealizedCastsPass());
+ buildHostPostPipeline(pm, options);
}
} // namespace
``````````
</details>
https://github.com/llvm/llvm-project/pull/69934
More information about the Mlir-commits
mailing list