[Mlir-commits] [mlir] [mlir][target][nvvm] Perf by stage and store into properties (PR #126178)
Zichen Lu
llvmlistbot at llvm.org
Thu Feb 6 21:35:56 PST 2025
https://github.com/MikaOvO created https://github.com/llvm/llvm-project/pull/126178
Implement the feature about perf by stage(llvm-ir -> isa, isa->binary).
The results will be stored into the properties, then users can use them after using GpuModuleToBinary Pass.
>From 0cbfd61857ef10ba831f112029bcc3adbfb54cda Mon Sep 17 00:00:00 2001
From: Zichen Lu <mikaovo2000 at gmail.com>
Date: Fri, 7 Feb 2025 13:32:53 +0800
Subject: [PATCH] [mlir][target][nvvm] Perf by stage and store into properties
---
mlir/lib/Target/LLVM/NVVM/Target.cpp | 58 +++++++++++++++++--
.../Target/LLVM/SerializeNVVMTarget.cpp | 32 ++++++++++
2 files changed, 86 insertions(+), 4 deletions(-)
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index b7d60ed59db02d1..d5ef9f94c749afe 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -51,6 +51,11 @@ extern "C" const unsigned char _mlir_embedded_libdevice[];
extern "C" const unsigned _mlir_embedded_libdevice_size;
namespace {
+// Model that contains performance results by stage.
+struct ModuleToObjectPerfResult {
+ int64_t llvmIRToPTX;
+ int64_t ptxToBinary;
+};
// Implementation of the `TargetAttrInterface` model.
class NVVMTargetAttrImpl
: public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
@@ -220,6 +225,10 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
std::optional<SmallVector<char, 0>>
moduleToObject(llvm::Module &llvmModule) override;
+ /// Get ModuleToObject function performance result.
+ /// Should call ModuleToObject function first.
+ ModuleToObjectPerfResult getModuleToObjectPerfResult();
+
private:
using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
@@ -235,6 +244,9 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
/// Target options.
gpu::TargetOptions targetOptions;
+
+ /// ModuleToObject performance result.
+ ModuleToObjectPerfResult moduleToObjectPerfResult;
};
} // namespace
@@ -256,6 +268,10 @@ NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
return TmpFile(filename, llvm::FileRemover(filename.c_str()));
}
+ModuleToObjectPerfResult NVPTXSerializer::getModuleToObjectPerfResult() {
+ return moduleToObjectPerfResult;
+}
+
gpu::GPUModuleOp NVPTXSerializer::getOperation() {
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
}
@@ -618,6 +634,8 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
std::optional<SmallVector<char, 0>>
NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
+ std::chrono::high_resolution_clock::time_point llvmPoint =
+ std::chrono::high_resolution_clock::now();
// Return LLVM IR if the compilation target is `offload`.
#define DEBUG_TYPE "serialize-to-llvm"
LLVM_DEBUG({
@@ -650,6 +668,8 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
getOperation().emitError() << "Failed translating the module to ISA.";
return std::nullopt;
}
+ std::chrono::high_resolution_clock::time_point ptxPoint =
+ std::chrono::high_resolution_clock::now();
if (isaCallback)
isaCallback(serializedISA.value());
@@ -669,17 +689,31 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
return SmallVector<char, 0>(bin.begin(), bin.end());
}
+ std::optional<SmallVector<char, 0>> result;
// Compile to binary.
#if MLIR_ENABLE_NVPTXCOMPILER
- return compileToBinaryNVPTX(*serializedISA);
+ result = compileToBinaryNVPTX(*serializedISA);
#else
- return compileToBinary(*serializedISA);
+ result = compileToBinary(*serializedISA);
#endif // MLIR_ENABLE_NVPTXCOMPILER
+
+ std::chrono::high_resolution_clock::time_point binaryPoint =
+ std::chrono::high_resolution_clock::now();
+
+ moduleToObjectPerfResult = {
+ std::chrono::duration_cast<std::chrono::milliseconds>(ptxPoint -
+ llvmPoint)
+ .count(),
+ std::chrono::duration_cast<std::chrono::milliseconds>(binaryPoint -
+ ptxPoint)
+ .count()};
+ return result;
}
std::optional<SmallVector<char, 0>>
NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
const gpu::TargetOptions &options) const {
+ Builder builder(attribute.getContext());
assert(module && "The module must be non null.");
if (!module)
return std::nullopt;
@@ -689,7 +723,15 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
}
NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
serializer.init();
- return serializer.run();
+ std::optional<SmallVector<char, 0>> result = serializer.run();
+ auto moduleToObjectPerfResult = serializer.getModuleToObjectPerfResult();
+ module->setAttr(
+ "LLVMIRToPTXTimeCost",
+ builder.getI64IntegerAttr(moduleToObjectPerfResult.llvmIRToPTX));
+ module->setAttr(
+ "PTXToBinaryTimeCost",
+ builder.getI64IntegerAttr(moduleToObjectPerfResult.ptxToBinary));
+ return result;
}
Attribute
@@ -700,7 +742,7 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
gpu::CompilationTarget format = options.getCompilationTarget();
DictionaryAttr objectProps;
Builder builder(attribute.getContext());
- SmallVector<NamedAttribute, 2> properties;
+ SmallVector<NamedAttribute, 4> properties;
if (format == gpu::CompilationTarget::Assembly)
properties.push_back(
builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
@@ -709,6 +751,14 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
builder.getStringAttr(section)));
+ for (const auto *perfName : {"LLVMIRToPTXTimeCost", "PTXToBinaryTimeCost"}) {
+ if (module->hasAttr(perfName)) {
+ IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
+ properties.push_back(builder.getNamedAttr(
+ perfName, builder.getI64IntegerAttr(attr.getInt())));
+ }
+ }
+
if (!properties.empty())
objectProps = builder.getDictionaryAttr(properties);
diff --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
index eabfd1c4d32eb03..72d143079a5e450 100644
--- a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@@ -296,3 +296,35 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
ASSERT_TRUE(!object->empty());
}
}
+
+// Test performance results are injected into module.
+TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(Stage)) {
+ MLIRContext context(registry);
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
+
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+
+ gpu::TargetOptions options({}, {}, {}, {}, gpu::CompilationTarget::Assembly);
+
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_TRUE(!object->empty());
+ ASSERT_TRUE(gpuModule->hasAttr("LLVMIRToPTXTimeCost"));
+ ASSERT_TRUE(gpuModule->hasAttr("PTXToBinaryTimeCost"));
+
+ Attribute attr = serializer.createObject(gpuModule, *object, options);
+ ASSERT_TRUE(!!attr);
+ auto objectAttr = cast<gpu::ObjectAttr>(attr);
+ auto props = objectAttr.getProperties();
+ ASSERT_TRUE(!!props.get("LLVMIRToPTXTimeCost"));
+ ASSERT_TRUE(!!props.get("PTXToBinaryTimeCost"));
+ }
+}
More information about the Mlir-commits
mailing list