[Mlir-commits] [mlir] eb8d6af - [mlir] Specify cuda-runner pass pipeline as command line options.
Christian Sigg
llvmlistbot at llvm.org
Wed Feb 24 05:37:05 PST 2021
Author: Christian Sigg
Date: 2021-02-24T14:36:52+01:00
New Revision: eb8d6af5e406f1dafb2f742bc9994ffbc8a6ea76
URL: https://github.com/llvm/llvm-project/commit/eb8d6af5e406f1dafb2f742bc9994ffbc8a6ea76
DIFF: https://github.com/llvm/llvm-project/commit/eb8d6af5e406f1dafb2f742bc9994ffbc8a6ea76.diff
LOG: [mlir] Specify cuda-runner pass pipeline as command line options.
The cuda-runner registers two pass pipelines for nested passes,
so that we don't have to use verbose textual pass pipeline specification.
Reviewed By: herhut
Differential Revision: https://reviews.llvm.org/D97091
Added:
Modified:
mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
mlir/test/mlir-cuda-runner/all-reduce-and.mlir
mlir/test/mlir-cuda-runner/all-reduce-max.mlir
mlir/test/mlir-cuda-runner/all-reduce-min.mlir
mlir/test/mlir-cuda-runner/all-reduce-op.mlir
mlir/test/mlir-cuda-runner/all-reduce-or.mlir
mlir/test/mlir-cuda-runner/all-reduce-region.mlir
mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
mlir/test/mlir-cuda-runner/async.mlir
mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
mlir/test/mlir-cuda-runner/shuffle.mlir
mlir/test/mlir-cuda-runner/two-modules.mlir
mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
index 1b9e36180114..35d8c9aeb246 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
@@ -93,9 +93,9 @@ class GpuKernelToBlobPass
LoweringCallback loweringCallback;
BlobGenerator blobGenerator;
llvm::Triple triple;
- StringRef targetChip;
- StringRef features;
- StringRef blobAnnotation;
+ std::string targetChip;
+ std::string features;
+ std::string blobAnnotation;
};
} // anonymous namespace
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir b/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
index aa2391922889..0818a58d1518 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-and.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir b/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
index c6d7293c509d..10c2fedc8563 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-max.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir b/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
index da0467699e88..7e83f911cd29 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-min.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir b/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
index 579ea9b1b2fa..f211ae474b10 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-op.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir b/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
index 9b27f065f21d..cb30391c23d4 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-or.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir b/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
index 2f234489196c..0e097b53b1ad 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-region.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir b/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
index d2fd68a64190..7d6e690eb592 100644
--- a/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
+++ b/mlir/test/mlir-cuda-runner/all-reduce-xor.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/async.mlir b/mlir/test/mlir-cuda-runner/async.mlir
index 19eb6af374c8..efc028743b1a 100644
--- a/mlir/test/mlir-cuda-runner/async.mlir
+++ b/mlir/test/mlir-cuda-runner/async.mlir
@@ -1,7 +1,12 @@
-// RUN: mlir-cuda-runner %s --entry-point-result=void -O0 \
+// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-async-region -async-ref-counting \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -async-to-async-runtime -convert-async-to-llvm -convert-std-to-llvm \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
+// RUN: --entry-point-result=void -O0 \
// RUN: | FileCheck %s
func @main() {
diff --git a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir b/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
index 902c45c704b9..f5ce4bb37d60 100644
--- a/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
+++ b/mlir/test/mlir-cuda-runner/gpu-to-cubin.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir b/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
index 6eabb14e12fd..0ebc05719d07 100644
--- a/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
+++ b/mlir/test/mlir-cuda-runner/multiple-all-reduce.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/shuffle.mlir b/mlir/test/mlir-cuda-runner/shuffle.mlir
index fdd086ac1a40..3af479c37221 100644
--- a/mlir/test/mlir-cuda-runner/shuffle.mlir
+++ b/mlir/test/mlir-cuda-runner/shuffle.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/test/mlir-cuda-runner/two-modules.mlir b/mlir/test/mlir-cuda-runner/two-modules.mlir
index 93a6817b24b0..de3fe848ac1c 100644
--- a/mlir/test/mlir-cuda-runner/two-modules.mlir
+++ b/mlir/test/mlir-cuda-runner/two-modules.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-cuda-runner %s \
+// RUN: -gpu-to-cubin="gpu-binary-annotation=nvvm.cubin" \
+// RUN: -gpu-to-llvm="gpu-binary-annotation=nvvm.cubin" \
// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
index 065173032216..025b8133d8d9 100644
--- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
+++ b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
@@ -14,11 +14,9 @@
#include "llvm/ADT/STLExtras.h"
-#include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
+#include "mlir/Conversion/Passes.h"
#include "mlir/Dialect/Async/IR/Async.h"
#include "mlir/Dialect/Async/Passes.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
@@ -44,35 +42,36 @@
using namespace mlir;
-inline void emit_cuda_error(const llvm::Twine &message, const char *buffer,
- CUresult error, Location loc) {
- emitError(loc, message.concat(" failed with error code ")
+static void emitCudaError(const llvm::Twine &expr, const char *buffer,
+ CUresult result, Location loc) {
+ const char *error;
+ cuGetErrorString(result, &error);
+ emitError(loc, expr.concat(" failed with error code ")
.concat(llvm::Twine{error})
.concat("[")
.concat(buffer)
.concat("]"));
}
-#define RETURN_ON_CUDA_ERROR(expr, msg) \
- { \
- auto _cuda_error = (expr); \
- if (_cuda_error != CUDA_SUCCESS) { \
- emit_cuda_error(msg, jitErrorBuffer, _cuda_error, loc); \
+#define RETURN_ON_CUDA_ERROR(expr) \
+ do { \
+ if (auto status = (expr)) { \
+ emitCudaError(#expr, jitErrorBuffer, status, loc); \
return {}; \
} \
- }
+ } while (false)
OwnedBlob compilePtxToCubin(const std::string ptx, Location loc,
StringRef name) {
char jitErrorBuffer[4096] = {0};
- RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit");
+ RETURN_ON_CUDA_ERROR(cuInit(0));
// Linking requires a device context.
CUdevice device;
- RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0), "cuDeviceGet");
+ RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
CUcontext context;
- RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device), "cuCtxCreate");
+ RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device));
CUlinkState linkState;
CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
@@ -83,8 +82,7 @@ OwnedBlob compilePtxToCubin(const std::string ptx, Location loc,
RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
jitOptions, /* jit options */
jitOptionsVals, /* jit option values */
- &linkState),
- "cuLinkCreate");
+ &linkState));
RETURN_ON_CUDA_ERROR(
cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
@@ -93,51 +91,69 @@ OwnedBlob compilePtxToCubin(const std::string ptx, Location loc,
0, /* number of jit options */
nullptr, /* jit options */
nullptr /* jit option values */
- ),
- "cuLinkAddData");
+ ));
void *cubinData;
size_t cubinSize;
- RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize),
- "cuLinkComplete");
+ RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
char *cubinAsChar = static_cast<char *>(cubinData);
OwnedBlob result =
std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
// This will also destroy the cubin data.
- RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState), "cuLinkDestroy");
- RETURN_ON_CUDA_ERROR(cuCtxDestroy(context), "cuCtxDestroy");
+ RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
+ RETURN_ON_CUDA_ERROR(cuCtxDestroy(context));
return result;
}
-static LogicalResult runMLIRPasses(ModuleOp m) {
- PassManager pm(m.getContext());
+struct GpuToCubinPipelineOptions
+ : public mlir::PassPipelineOptions<GpuToCubinPipelineOptions> {
+ Option<std::string> gpuBinaryAnnotation{
+ *this, "gpu-binary-annotation",
+ llvm::cl::desc("Annotation attribute string for GPU binary")};
+};
+
+// Register cuda-runner specific passes.
+static void registerCudaRunnerPasses() {
+ PassPipelineRegistration<GpuToCubinPipelineOptions> registerGpuToCubin(
+ "gpu-to-cubin", "Generate CUBIN from gpu.launch regions",
+ [&](OpPassManager &pm, const GpuToCubinPipelineOptions &options) {
+ pm.addPass(createGpuKernelOutliningPass());
+ auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
+ kernelPm.addPass(createStripDebugInfoPass());
+ kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
+ kernelPm.addPass(createConvertGPUKernelToBlobPass(
+ translateModuleToLLVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda",
+ "sm_35", "+ptx60", options.gpuBinaryAnnotation));
+ });
+ registerGPUPasses();
+ registerGpuToLLVMConversionPassPass();
+ registerAsyncPasses();
+ registerConvertAsyncToLLVMPass();
+ registerConvertStandardToLLVMPass();
+}
+
+static LogicalResult runMLIRPasses(ModuleOp module,
+ PassPipelineCLParser &passPipeline) {
+ PassManager pm(module.getContext(), PassManager::Nesting::Implicit);
applyPassManagerCLOptions(pm);
- const char gpuBinaryAnnotation[] = "nvvm.cubin";
- pm.addPass(createGpuKernelOutliningPass());
- auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
- kernelPm.addPass(createStripDebugInfoPass());
- kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
- kernelPm.addPass(createConvertGPUKernelToBlobPass(
- translateModuleToLLVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda",
- "sm_35", "+ptx60", gpuBinaryAnnotation));
- auto &funcPm = pm.nest<FuncOp>();
- funcPm.addPass(createGpuAsyncRegionPass());
- funcPm.addPass(createAsyncRefCountingPass());
- pm.addPass(createGpuToLLVMConversionPass(gpuBinaryAnnotation));
- pm.addPass(createAsyncToAsyncRuntimePass());
- pm.addPass(createConvertAsyncToLLVMPass());
- mlir::LowerToLLVMOptions lower_to_llvm_opts;
- pm.addPass(mlir::createLowerToLLVMPass(lower_to_llvm_opts));
-
- return pm.run(m);
+ auto errorHandler = [&](const Twine &msg) {
+ emitError(UnknownLoc::get(module.getContext())) << msg;
+ return failure();
+ };
+
+ // Build the provided pipeline.
+ if (failed(passPipeline.addToPipeline(pm, errorHandler)))
+ return failure();
+
+ // Run the pipeline.
+ return pm.run(module);
}
int main(int argc, char **argv) {
- registerPassManagerCLOptions();
llvm::InitLLVM y(argc, argv);
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
@@ -150,8 +166,16 @@ int main(int argc, char **argv) {
mlir::initializeLLVMPasses();
+ registerCudaRunnerPasses();
+ PassPipelineCLParser passPipeline("", "Compiler passes to run");
+ registerPassManagerCLOptions();
+
+ auto mlirTransformer = [&](ModuleOp module) {
+ return runMLIRPasses(module, passPipeline);
+ };
+
mlir::JitRunnerConfig jitRunnerConfig;
- jitRunnerConfig.mlirTransformer = runMLIRPasses;
+ jitRunnerConfig.mlirTransformer = mlirTransformer;
mlir::DialectRegistry registry;
registry.insert<mlir::LLVM::LLVMDialect, mlir::NVVM::NVVMDialect,
More information about the Mlir-commits
mailing list