[Mlir-commits] [mlir] 22a32f7 - [mlir][gpu] Add dump-ptx option
Guray Ozen
llvmlistbot at llvm.org
Thu Jul 13 12:15:02 PDT 2023
Author: Guray Ozen
Date: 2023-07-13T21:14:57+02:00
New Revision: 22a32f7d9cb1beed4c37eed6ebea30c60f4d4a3b
URL: https://github.com/llvm/llvm-project/commit/22a32f7d9cb1beed4c37eed6ebea30c60f4d4a3b
DIFF: https://github.com/llvm/llvm-project/commit/22a32f7d9cb1beed4c37eed6ebea30c60f4d4a3b.diff
LOG: [mlir][gpu] Add dump-ptx option
When targeting NVIDIA GPUs, seeing the generated PTX is important. Currently, we don't have simple way to do it.
This work adds dump-ptx to gpu-to-cubin pass. One can use it like `gpu-to-cubin{chip=sm_90 features=+ptx80 dump-ptx}`.
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D155166
Added:
mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir
Modified:
mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
index d24d4d89e24935..1afbcb2128d490 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h
@@ -117,6 +117,9 @@ class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> {
*this, "gpu-binary-annotation",
llvm::cl::desc("Annotation attribute string for GPU binary"),
llvm::cl::init(getDefaultGpuBinaryAnnotation())};
+ Option<bool> dumpPtx{*this, "dump-ptx",
+ ::llvm::cl::desc("Dump generated PTX"),
+ llvm::cl::init(false)};
};
} // namespace gpu
@@ -137,7 +140,8 @@ void registerGpuSerializeToHsacoPass();
std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple,
StringRef chip,
StringRef features,
- int optLevel = 2);
+ int optLevel = 2,
+ bool dumpPtx = false);
/// Create an instance of the GPU kernel function to HSAco binary serialization
/// pass.
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
index 4df9e6d45470a9..172f006c7b6fb1 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "llvm/Support/Debug.h"
#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
#include "mlir/Pass/Pass.h"
@@ -50,7 +51,7 @@ class SerializeToCubinPass
SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
StringRef chip = "sm_35", StringRef features = "+ptx60",
- int optLevel = 2);
+ int optLevel = 2, bool dumpPtx = false);
StringRef getArgument() const override { return "gpu-to-cubin"; }
StringRef getDescription() const override {
@@ -73,10 +74,12 @@ static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
}
SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
- StringRef features, int optLevel) {
+ StringRef features, int optLevel,
+ bool dumpPtx) {
maybeSetOption(this->triple, triple);
maybeSetOption(this->chip, chip);
maybeSetOption(this->features, features);
+ this->dumpPtx = dumpPtx;
if (this->optLevel.getNumOccurrences() == 0)
this->optLevel.setValue(optLevel);
}
@@ -112,6 +115,10 @@ SerializeToCubinPass::serializeISA(const std::string &isa) {
&linkState));
auto kernelName = getOperation().getName().str();
+ if (dumpPtx) {
+ llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
+ llvm::dbgs() << isa << "\n";
+ }
RETURN_ON_CUDA_ERROR(cuLinkAddData(
linkState, CUjitInputType::CU_JIT_INPUT_PTX,
const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
@@ -151,9 +158,10 @@ void mlir::registerGpuSerializeToCubinPass() {
std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
StringRef arch,
StringRef features,
- int optLevel) {
+ int optLevel,
+ bool dumpPtx) {
return std::make_unique<SerializeToCubinPass>(triple, arch, features,
- optLevel);
+ optLevel, dumpPtx);
}
#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir
new file mode 100644
index 00000000000000..2c1ae3ee840d0f
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir
@@ -0,0 +1,15 @@
+// RUN: mlir-opt %s \
+// RUN: | mlir-opt -gpu-kernel-outlining \
+// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{dump-ptx}))' \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: Generated by LLVM NVPTX Back-End
+// CHECK: .visible .func kernel_a()
+// CHECK: ret;
+
+gpu.module @bar {
+ llvm.func @kernel_a()
+ attributes { gpu.kernel } {
+ llvm.return
+ }
+}
More information about the Mlir-commits
mailing list