[Mlir-commits] [mlir] [mlir][gpu] `gpu-module-to-binary`: add option to dump intermediate files (PR #170016)
Ivan Butygin
llvmlistbot at llvm.org
Sun Nov 30 08:19:30 PST 2025
https://github.com/Hardcode84 updated https://github.com/llvm/llvm-project/pull/170016
>From 8ce37a2fc09cf08a047ad27ce245cb08b7d7b349 Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Sun, 30 Nov 2025 01:25:30 +0100
Subject: [PATCH 1/2] [mlir][gpu] `gpu-module-to-binary`: add option to dump
intermediate files
Add option to specify dir to dump inetrmediate files during gpu binaries generation for debug.
Also fix ROCDL lowering bug where callbacks weren't propagated.
---
.../mlir/Dialect/GPU/Transforms/Passes.td | 4 +-
.../Dialect/GPU/Transforms/ModuleToBinary.cpp | 63 ++++++++++++++++++-
mlir/lib/Target/LLVM/ModuleToObject.cpp | 6 +-
mlir/lib/Target/LLVM/ROCDL/Target.cpp | 10 ++-
.../module-to-binary-nvvm-intermediates.mlir | 17 +++++
.../module-to-binary-rocdl-intermediates.mlir | 17 +++++
6 files changed, 113 insertions(+), 4 deletions(-)
create mode 100644 mlir/test/Dialect/GPU/module-to-binary-nvvm-intermediates.mlir
create mode 100644 mlir/test/Dialect/GPU/module-to-binary-rocdl-intermediates.mlir
diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
index 0c8a0c7a677ab..bfb407b3d7907 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -113,7 +113,9 @@ def GpuModuleToBinaryPass
Option<"compilationTarget", "format", "std::string", [{"fatbin"}],
"The target representation of the compilation process.">,
Option<"elfSection", "section", "std::string", [{""}],
- "ELF section where binary is to be located.">
+ "ELF section where binary is to be located.">,
+ Option<"dumpIntermediates", "dump-intermediates", "std::string", [{""}],
+ "Directory to dump intermediate artifacts (LLVM IR, device assembly).">
];
}
diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
index 95d5cadbd4e1a..c55cfa25c3482 100644
--- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
@@ -17,6 +17,12 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+#define DEBUG_TYPE "gpu-module-to-binary"
using namespace mlir;
using namespace mlir::gpu;
@@ -26,6 +32,27 @@ namespace mlir {
#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
} // namespace mlir
+static void dumpToFile(StringRef dumpDir, const llvm::Twine &filename,
+ function_ref<void(llvm::raw_ostream &)> writeContent) {
+ if (dumpDir.empty())
+ return;
+
+ llvm::SmallString<128> path(dumpDir);
+ llvm::sys::path::append(path, filename);
+
+ std::error_code ec;
+ llvm::ToolOutputFile output(path, ec, llvm::sys::fs::OF_None);
+ if (ec) {
+ LLVM_DEBUG(llvm::dbgs() << "Failed to create file '" << path
+ << "': " << ec.message() << "\n");
+ return;
+ }
+
+ writeContent(output.os());
+ output.keep();
+ LLVM_DEBUG(llvm::dbgs() << "Dumped intermediate to: " << path << "\n");
+}
+
namespace {
class GpuModuleToBinaryPass
: public impl::GpuModuleToBinaryPassBase<GpuModuleToBinaryPass> {
@@ -64,8 +91,42 @@ void GpuModuleToBinaryPass::runOnOperation() {
SmallVector<Attribute> librariesToLink;
for (const std::string &path : linkFiles)
librariesToLink.push_back(StringAttr::get(&getContext(), path));
+
+ // Create dump directory if specified
+ if (!dumpIntermediates.empty()) {
+ if (std::error_code ec =
+ llvm::sys::fs::create_directories(dumpIntermediates)) {
+ getOperation()->emitError() << "Failed to create dump directory '"
+ << dumpIntermediates << "': " << ec.message();
+ return signalPassFailure();
+ }
+ }
+
+ // Create callbacks for dumping intermediate artifacts if requested
+ auto initialIRCallback = [&](llvm::Module &module) {
+ dumpToFile(dumpIntermediates, module.getName() + ".initial.ll",
+ [&](llvm::raw_ostream &os) { module.print(os, nullptr); });
+ };
+
+ auto linkedIRCallback = [&](llvm::Module &module) {
+ dumpToFile(dumpIntermediates, module.getName() + ".linked.ll",
+ [&](llvm::raw_ostream &os) { module.print(os, nullptr); });
+ };
+
+ auto optimizedIRCallback = [&](llvm::Module &module) {
+ dumpToFile(dumpIntermediates, module.getName() + ".opt.ll",
+ [&](llvm::raw_ostream &os) { module.print(os, nullptr); });
+ };
+
+ auto isaCallback = [&](StringRef isa) {
+ dumpToFile(dumpIntermediates, "kernel.isa",
+ [&](llvm::raw_ostream &os) { os << isa; });
+ };
+
TargetOptions targetOptions(toolkitPath, librariesToLink, cmdOptions,
- elfSection, *targetFormat, lazyTableBuilder);
+ elfSection, *targetFormat, lazyTableBuilder,
+ initialIRCallback, linkedIRCallback,
+ optimizedIRCallback, isaCallback);
if (failed(transformGpuModulesToBinaries(
getOperation(), OffloadingLLVMTranslationAttrInterface(nullptr),
targetOptions)))
diff --git a/mlir/lib/Target/LLVM/ModuleToObject.cpp b/mlir/lib/Target/LLVM/ModuleToObject.cpp
index 4098ccc548dc1..ddbf568bc6568 100644
--- a/mlir/lib/Target/LLVM/ModuleToObject.cpp
+++ b/mlir/lib/Target/LLVM/ModuleToObject.cpp
@@ -143,7 +143,11 @@ LogicalResult ModuleToObject::loadBitcodeFilesFromList(
std::unique_ptr<llvm::Module>
ModuleToObject::translateToLLVMIR(llvm::LLVMContext &llvmContext) {
- return translateModuleToLLVMIR(&getOperation(), llvmContext);
+ Operation &op = getOperation();
+ // Try to get nicer name from the operation.
+ auto nameAttr = op.getAttrOfType<StringAttr>("sym_name");
+ StringRef name = nameAttr ? nameAttr.getValue() : "LLVMDialectModule";
+ return translateModuleToLLVMIR(&op, llvmContext, name);
}
LogicalResult
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index f813f8db8fc94..6b3cbbddcea08 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -95,7 +95,11 @@ SerializeGPUModuleBase::SerializeGPUModuleBase(
Operation &module, ROCDLTargetAttr target,
const gpu::TargetOptions &targetOptions)
: ModuleToObject(module, target.getTriple(), target.getChip(),
- target.getFeatures(), target.getO()),
+ target.getFeatures(), target.getO(),
+ targetOptions.getInitialLlvmIRCallback(),
+ targetOptions.getLinkedLlvmIRCallback(),
+ targetOptions.getOptimizedLlvmIRCallback(),
+ targetOptions.getISACallback()),
target(target), toolkitPath(targetOptions.getToolkitPath()),
librariesToLink(targetOptions.getLibrariesToLink()) {
@@ -428,6 +432,10 @@ std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
getOperation().emitError() << "failed translating the module to ISA";
return std::nullopt;
}
+
+ if (isaCallback)
+ isaCallback(serializedISA.value());
+
#define DEBUG_TYPE "serialize-to-isa"
LLVM_DEBUG({
llvm::dbgs() << "ISA for module: "
diff --git a/mlir/test/Dialect/GPU/module-to-binary-nvvm-intermediates.mlir b/mlir/test/Dialect/GPU/module-to-binary-nvvm-intermediates.mlir
new file mode 100644
index 0000000000000..af3e42cf346bb
--- /dev/null
+++ b/mlir/test/Dialect/GPU/module-to-binary-nvvm-intermediates.mlir
@@ -0,0 +1,17 @@
+// REQUIRES: host-supports-nvptx
+// RUN: rm -rf %t || true
+// RUN: mlir-opt %s --gpu-module-to-binary='format=isa dump-intermediates=%t' | FileCheck %s
+// RUN: test -f %t/kernel_module.initial.ll
+// RUN: test -f %t/kernel_module.linked.ll
+// RUN: test -f %t/kernel_module.opt.ll
+// RUN: test -f %t/kernel.isa
+
+module attributes {gpu.container_module} {
+ // CHECK-LABEL: gpu.binary @kernel_module
+
+ gpu.module @kernel_module [#nvvm.target<chip = "sm_70">] {
+ llvm.func @kernel(%arg0: f32) {
+ llvm.return
+ }
+ }
+}
diff --git a/mlir/test/Dialect/GPU/module-to-binary-rocdl-intermediates.mlir b/mlir/test/Dialect/GPU/module-to-binary-rocdl-intermediates.mlir
new file mode 100644
index 0000000000000..ad5af5e9742e4
--- /dev/null
+++ b/mlir/test/Dialect/GPU/module-to-binary-rocdl-intermediates.mlir
@@ -0,0 +1,17 @@
+// REQUIRES: host-supports-amdgpu
+// RUN: rm -rf %t || true
+// RUN: mlir-opt %s --gpu-module-to-binary='format=isa dump-intermediates=%t' | FileCheck %s
+// RUN: test -f %t/kernel_module.initial.ll
+// RUN: test -f %t/kernel_module.linked.ll
+// RUN: test -f %t/kernel_module.opt.ll
+// RUN: test -f %t/kernel.isa
+
+module attributes {gpu.container_module} {
+ // CHECK-LABEL: gpu.binary @kernel_module
+
+ gpu.module @kernel_module [#rocdl.target<chip = "gfx942">] {
+ llvm.func @kernel(%arg0: f32) {
+ llvm.return
+ }
+ }
+}
>From b102b577d4e499b39ffa0774c56e61f6ab672ea9 Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Sun, 30 Nov 2025 17:10:53 +0100
Subject: [PATCH 2/2] review comments
---
.../Dialect/GPU/Transforms/ModuleToBinary.cpp | 30 +++++++++----------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
index c55cfa25c3482..0d6243458732c 100644
--- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
@@ -17,7 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -43,14 +43,14 @@ static void dumpToFile(StringRef dumpDir, const llvm::Twine &filename,
std::error_code ec;
llvm::ToolOutputFile output(path, ec, llvm::sys::fs::OF_None);
if (ec) {
- LLVM_DEBUG(llvm::dbgs() << "Failed to create file '" << path
- << "': " << ec.message() << "\n");
+ LDBG() << "Failed to create file '" << path << "': " << ec.message()
+ << "\n";
return;
}
writeContent(output.os());
output.keep();
- LLVM_DEBUG(llvm::dbgs() << "Dumped intermediate to: " << path << "\n");
+ LDBG() << "Dumped intermediate to: " << path << "\n";
}
namespace {
@@ -92,7 +92,7 @@ void GpuModuleToBinaryPass::runOnOperation() {
for (const std::string &path : linkFiles)
librariesToLink.push_back(StringAttr::get(&getContext(), path));
- // Create dump directory if specified
+ // Create dump directory if specified.
if (!dumpIntermediates.empty()) {
if (std::error_code ec =
llvm::sys::fs::create_directories(dumpIntermediates)) {
@@ -102,20 +102,20 @@ void GpuModuleToBinaryPass::runOnOperation() {
}
}
- // Create callbacks for dumping intermediate artifacts if requested
- auto initialIRCallback = [&](llvm::Module &module) {
- dumpToFile(dumpIntermediates, module.getName() + ".initial.ll",
- [&](llvm::raw_ostream &os) { module.print(os, nullptr); });
+ // Create callbacks for dumping intermediate artifacts if requested.
+ auto initialIRCallback = [&](llvm::Module &mod) {
+ dumpToFile(dumpIntermediates, mod.getName() + ".initial.ll",
+ [&](llvm::raw_ostream &os) { mod.print(os, nullptr); });
};
- auto linkedIRCallback = [&](llvm::Module &module) {
- dumpToFile(dumpIntermediates, module.getName() + ".linked.ll",
- [&](llvm::raw_ostream &os) { module.print(os, nullptr); });
+ auto linkedIRCallback = [&](llvm::Module &mod) {
+ dumpToFile(dumpIntermediates, mod.getName() + ".linked.ll",
+ [&](llvm::raw_ostream &os) { mod.print(os, nullptr); });
};
- auto optimizedIRCallback = [&](llvm::Module &module) {
- dumpToFile(dumpIntermediates, module.getName() + ".opt.ll",
- [&](llvm::raw_ostream &os) { module.print(os, nullptr); });
+ auto optimizedIRCallback = [&](llvm::Module &mod) {
+ dumpToFile(dumpIntermediates, mod.getName() + ".opt.ll",
+ [&](llvm::raw_ostream &os) { mod.print(os, nullptr); });
};
auto isaCallback = [&](StringRef isa) {
More information about the Mlir-commits
mailing list