[Mlir-commits] [mlir] [mlir][target][nvvm] Perf by stage and store into properties (PR #126178)
Zichen Lu
llvmlistbot at llvm.org
Fri Feb 7 01:59:06 PST 2025
https://github.com/MikaOvO updated https://github.com/llvm/llvm-project/pull/126178
>From ed598152c8daf7dcc0841d8ae64e9281a20d8a28 Mon Sep 17 00:00:00 2001
From: Zichen Lu <mikaovo2000 at gmail.com>
Date: Fri, 7 Feb 2025 16:23:01 +0800
Subject: [PATCH] [mlir][target][nvvm] Perf by stage and store into properties'
---
mlir/lib/Target/LLVM/NVVM/Target.cpp | 69 +++++++++++++++++--
.../Dialect/GPU/module-to-binary-nvvm.mlir | 2 +-
2 files changed, 65 insertions(+), 6 deletions(-)
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index b7d60ed59db02d1..af90bf00e081a6f 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -39,6 +39,7 @@
#include <cstdint>
#include <cstdlib>
+#include <optional>
using namespace mlir;
using namespace mlir::NVVM;
@@ -220,6 +221,16 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
std::optional<SmallVector<char, 0>>
moduleToObject(llvm::Module &llvmModule) override;
+ /// Get LLVMIR->ISA performance result.
+ /// Return nullopt if moduleToObject has not been called or the target format
+ /// is LLVMIR.
+ std::optional<int64_t> getLLVMIRToISAPerfResult();
+
+ /// Get ISA->Binary performance result.
+ /// Return nullopt if moduleToObject has not been called or the target format
+ /// is LLVMIR or ISA.
+ std::optional<int64_t> getISAToBinaryPerfResult();
+
private:
using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
@@ -235,13 +246,20 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
/// Target options.
gpu::TargetOptions targetOptions;
+
+ /// LLVMIR->ISA perf result.
+ std::optional<int64_t> llvmToISAPerfResult;
+
+ /// ISA->Binary perf result.
+ std::optional<int64_t> isaToBinaryPerfResult;
};
} // namespace
NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
const gpu::TargetOptions &targetOptions)
: SerializeGPUModuleBase(module, target, targetOptions),
- targetOptions(targetOptions) {}
+ targetOptions(targetOptions), llvmToISAPerfResult(std::nullopt),
+ isaToBinaryPerfResult(std::nullopt) {}
std::optional<NVPTXSerializer::TmpFile>
NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
@@ -256,6 +274,14 @@ NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
return TmpFile(filename, llvm::FileRemover(filename.c_str()));
}
+std::optional<int64_t> NVPTXSerializer::getLLVMIRToISAPerfResult() {
+ return llvmToISAPerfResult;
+}
+
+std::optional<int64_t> NVPTXSerializer::getISAToBinaryPerfResult() {
+ return isaToBinaryPerfResult;
+}
+
gpu::GPUModuleOp NVPTXSerializer::getOperation() {
return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
}
@@ -618,6 +644,8 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
std::optional<SmallVector<char, 0>>
NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
+ std::chrono::high_resolution_clock::time_point llvmPoint =
+ std::chrono::high_resolution_clock::now();
// Return LLVM IR if the compilation target is `offload`.
#define DEBUG_TYPE "serialize-to-llvm"
LLVM_DEBUG({
@@ -650,6 +678,11 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
getOperation().emitError() << "Failed translating the module to ISA.";
return std::nullopt;
}
+ std::chrono::high_resolution_clock::time_point ptxPoint =
+ std::chrono::high_resolution_clock::now();
+ llvmToISAPerfResult = std::chrono::duration_cast<std::chrono::milliseconds>(
+ ptxPoint - llvmPoint)
+ .count();
if (isaCallback)
isaCallback(serializedISA.value());
@@ -669,17 +702,26 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
return SmallVector<char, 0>(bin.begin(), bin.end());
}
+ std::optional<SmallVector<char, 0>> result;
// Compile to binary.
#if MLIR_ENABLE_NVPTXCOMPILER
- return compileToBinaryNVPTX(*serializedISA);
+ result = compileToBinaryNVPTX(*serializedISA);
#else
- return compileToBinary(*serializedISA);
+ result = compileToBinary(*serializedISA);
#endif // MLIR_ENABLE_NVPTXCOMPILER
+
+ std::chrono::high_resolution_clock::time_point binaryPoint =
+ std::chrono::high_resolution_clock::now();
+ isaToBinaryPerfResult = std::chrono::duration_cast<std::chrono::milliseconds>(
+ binaryPoint - ptxPoint)
+ .count();
+ return result;
}
std::optional<SmallVector<char, 0>>
NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
const gpu::TargetOptions &options) const {
+ Builder builder(attribute.getContext());
assert(module && "The module must be non null.");
if (!module)
return std::nullopt;
@@ -689,7 +731,16 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
}
NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
serializer.init();
- return serializer.run();
+ std::optional<SmallVector<char, 0>> result = serializer.run();
+ auto llvmToISAPerfResult = serializer.getLLVMIRToISAPerfResult();
+ if (llvmToISAPerfResult.has_value())
+ module->setAttr("LLVMIRToPTXTimeCost",
+ builder.getI64IntegerAttr(*llvmToISAPerfResult));
+ auto isaToBinaryPerfResult = serializer.getISAToBinaryPerfResult();
+ if (isaToBinaryPerfResult.has_value())
+ module->setAttr("PTXToBinaryTimeCost",
+ builder.getI64IntegerAttr(*isaToBinaryPerfResult));
+ return result;
}
Attribute
@@ -700,7 +751,7 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
gpu::CompilationTarget format = options.getCompilationTarget();
DictionaryAttr objectProps;
Builder builder(attribute.getContext());
- SmallVector<NamedAttribute, 2> properties;
+ SmallVector<NamedAttribute, 4> properties;
if (format == gpu::CompilationTarget::Assembly)
properties.push_back(
builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
@@ -709,6 +760,14 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
builder.getStringAttr(section)));
+ for (const auto *perfName : {"LLVMIRToPTXTimeCost", "PTXToBinaryTimeCost"}) {
+ if (module->hasAttr(perfName)) {
+ IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
+ properties.push_back(builder.getNamedAttr(
+ perfName, builder.getI64IntegerAttr(attr.getInt())));
+ }
+ }
+
if (!properties.empty())
objectProps = builder.getDictionaryAttr(properties);
diff --git a/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir b/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
index e6284ccf94b505e..c7d94091779b12a 100644
--- a/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
+++ b/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
@@ -16,7 +16,7 @@ module attributes {gpu.container_module} {
}
// CHECK-LABEL:gpu.binary @kernel_module2
- // CHECK-ISA:[#gpu.object<#nvvm.target<flags = {fast}>, properties = {O = 2 : i32}, assembly = "{{.*}}">, #gpu.object<#nvvm.target, properties = {O = 2 : i32}, assembly = "{{.*}}">]
+ // CHECK-ISA:[#gpu.object<#nvvm.target<flags = {fast}>, properties = {LLVMIRToPTXTimeCost = {{[0-9]+}} : i64, O = 2 : i32}, assembly = "{{.*}}">, #gpu.object<#nvvm.target, properties = {LLVMIRToPTXTimeCost = {{[0-9]+}} : i64, O = 2 : i32}, assembly = "{{.*}}">]
gpu.module @kernel_module2 [#nvvm.target<flags = {fast}>, #nvvm.target] {
llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
%arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
More information about the Mlir-commits
mailing list