[Mlir-commits] [mlir] [mlir][target][nvvm] Perf by stage and store into properties (PR #126178)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Feb 6 21:36:29 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mlir

Author: Zichen Lu (MikaOvO)

<details>
<summary>Changes</summary>

Implement the feature about perf by stage(llvm-ir -> isa, isa->binary).

The results will be stored into the properties, then users can use them after using GpuModuleToBinary Pass.

---
Full diff: https://github.com/llvm/llvm-project/pull/126178.diff


2 Files Affected:

- (modified) mlir/lib/Target/LLVM/NVVM/Target.cpp (+54-4) 
- (modified) mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp (+32) 


``````````diff
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index b7d60ed59db02d1..d5ef9f94c749afe 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -51,6 +51,11 @@ extern "C" const unsigned char _mlir_embedded_libdevice[];
 extern "C" const unsigned _mlir_embedded_libdevice_size;
 
 namespace {
+// Model that contains performance results by stage.
+struct ModuleToObjectPerfResult {
+  int64_t llvmIRToPTX;
+  int64_t ptxToBinary;
+};
 // Implementation of the `TargetAttrInterface` model.
 class NVVMTargetAttrImpl
     : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
@@ -220,6 +225,10 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
   std::optional<SmallVector<char, 0>>
   moduleToObject(llvm::Module &llvmModule) override;
 
+  /// Get ModuleToObject function performance result.
+  /// Should call ModuleToObject function first.
+  ModuleToObjectPerfResult getModuleToObjectPerfResult();
+
 private:
   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
 
@@ -235,6 +244,9 @@ class NVPTXSerializer : public SerializeGPUModuleBase {
 
   /// Target options.
   gpu::TargetOptions targetOptions;
+
+  /// ModuleToObject performance result.
+  ModuleToObjectPerfResult moduleToObjectPerfResult;
 };
 } // namespace
 
@@ -256,6 +268,10 @@ NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
   return TmpFile(filename, llvm::FileRemover(filename.c_str()));
 }
 
+ModuleToObjectPerfResult NVPTXSerializer::getModuleToObjectPerfResult() {
+  return moduleToObjectPerfResult;
+}
+
 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
   return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
 }
@@ -618,6 +634,8 @@ NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
 
 std::optional<SmallVector<char, 0>>
 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
+  std::chrono::high_resolution_clock::time_point llvmPoint =
+      std::chrono::high_resolution_clock::now();
   // Return LLVM IR if the compilation target is `offload`.
 #define DEBUG_TYPE "serialize-to-llvm"
   LLVM_DEBUG({
@@ -650,6 +668,8 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
     getOperation().emitError() << "Failed translating the module to ISA.";
     return std::nullopt;
   }
+  std::chrono::high_resolution_clock::time_point ptxPoint =
+      std::chrono::high_resolution_clock::now();
   if (isaCallback)
     isaCallback(serializedISA.value());
 
@@ -669,17 +689,31 @@ NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
     return SmallVector<char, 0>(bin.begin(), bin.end());
   }
 
+  std::optional<SmallVector<char, 0>> result;
   // Compile to binary.
 #if MLIR_ENABLE_NVPTXCOMPILER
-  return compileToBinaryNVPTX(*serializedISA);
+  result = compileToBinaryNVPTX(*serializedISA);
 #else
-  return compileToBinary(*serializedISA);
+  result = compileToBinary(*serializedISA);
 #endif // MLIR_ENABLE_NVPTXCOMPILER
+
+  std::chrono::high_resolution_clock::time_point binaryPoint =
+      std::chrono::high_resolution_clock::now();
+
+  moduleToObjectPerfResult = {
+      std::chrono::duration_cast<std::chrono::milliseconds>(ptxPoint -
+                                                            llvmPoint)
+          .count(),
+      std::chrono::duration_cast<std::chrono::milliseconds>(binaryPoint -
+                                                            ptxPoint)
+          .count()};
+  return result;
 }
 
 std::optional<SmallVector<char, 0>>
 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
                                       const gpu::TargetOptions &options) const {
+  Builder builder(attribute.getContext());
   assert(module && "The module must be non null.");
   if (!module)
     return std::nullopt;
@@ -689,7 +723,15 @@ NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
   }
   NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
   serializer.init();
-  return serializer.run();
+  std::optional<SmallVector<char, 0>> result = serializer.run();
+  auto moduleToObjectPerfResult = serializer.getModuleToObjectPerfResult();
+  module->setAttr(
+      "LLVMIRToPTXTimeCost",
+      builder.getI64IntegerAttr(moduleToObjectPerfResult.llvmIRToPTX));
+  module->setAttr(
+      "PTXToBinaryTimeCost",
+      builder.getI64IntegerAttr(moduleToObjectPerfResult.ptxToBinary));
+  return result;
 }
 
 Attribute
@@ -700,7 +742,7 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
   gpu::CompilationTarget format = options.getCompilationTarget();
   DictionaryAttr objectProps;
   Builder builder(attribute.getContext());
-  SmallVector<NamedAttribute, 2> properties;
+  SmallVector<NamedAttribute, 4> properties;
   if (format == gpu::CompilationTarget::Assembly)
     properties.push_back(
         builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
@@ -709,6 +751,14 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
     properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
                                               builder.getStringAttr(section)));
 
+  for (const auto *perfName : {"LLVMIRToPTXTimeCost", "PTXToBinaryTimeCost"}) {
+    if (module->hasAttr(perfName)) {
+      IntegerAttr attr = llvm::dyn_cast<IntegerAttr>(module->getAttr(perfName));
+      properties.push_back(builder.getNamedAttr(
+          perfName, builder.getI64IntegerAttr(attr.getInt())));
+    }
+  }
+
   if (!properties.empty())
     objectProps = builder.getDictionaryAttr(properties);
 
diff --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
index eabfd1c4d32eb03..72d143079a5e450 100644
--- a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@@ -296,3 +296,35 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(LinkedLLVMIRResource)) {
     ASSERT_TRUE(!object->empty());
   }
 }
+
+// Test performance results are injected into module.
+TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(Stage)) {
+  MLIRContext context(registry);
+
+  OwningOpRef<ModuleOp> module =
+      parseSourceString<ModuleOp>(moduleStr, &context);
+  ASSERT_TRUE(!!module);
+
+  NVVM::NVVMTargetAttr target = NVVM::NVVMTargetAttr::get(&context);
+
+  auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+  ASSERT_TRUE(!!serializer);
+
+  gpu::TargetOptions options({}, {}, {}, {}, gpu::CompilationTarget::Assembly);
+
+  for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+    std::optional<SmallVector<char, 0>> object =
+        serializer.serializeToObject(gpuModule, options);
+    ASSERT_TRUE(object != std::nullopt);
+    ASSERT_TRUE(!object->empty());
+    ASSERT_TRUE(gpuModule->hasAttr("LLVMIRToPTXTimeCost"));
+    ASSERT_TRUE(gpuModule->hasAttr("PTXToBinaryTimeCost"));
+
+    Attribute attr = serializer.createObject(gpuModule, *object, options);
+    ASSERT_TRUE(!!attr);
+    auto objectAttr = cast<gpu::ObjectAttr>(attr);
+    auto props = objectAttr.getProperties();
+    ASSERT_TRUE(!!props.get("LLVMIRToPTXTimeCost"));
+    ASSERT_TRUE(!!props.get("PTXToBinaryTimeCost"));
+  }
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/126178


More information about the Mlir-commits mailing list