[Mlir-commits] [mlir] 9919295 - [mlir][gpu] Adding ELF section option to the gpu-module-to-binary pass (#119440)

Mon Dec 16 09:09:45 PST 2024

Author: Renaud Kauffmann
Date: 2024-12-16T09:09:41-08:00
New Revision: 9919295cfd05222159246d7448ec42392e98fbf2

URL: https://github.com/llvm/llvm-project/commit/9919295cfd05222159246d7448ec42392e98fbf2
DIFF: https://github.com/llvm/llvm-project/commit/9919295cfd05222159246d7448ec42392e98fbf2.diff

LOG: [mlir][gpu] Adding ELF section option to the gpu-module-to-binary pass (#119440)

This is a follow-up of #117246.

I thought then it would be easy to edit a DictionaryAttr but it turns
out that these attributes are immutable and need to be passed during the
construction of the gpu.binary Op.

The first commit was using the NVVMTargetAttr to pass the information.
After feedback from @fabianmcg, this PR now passes the information
through a new option of the gpu-module-to-binary pass.

Please add reviewers, as you see fit.

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
    mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
    mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
    mlir/lib/Target/LLVM/NVVM/Target.cpp
    mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
    mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
    mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
    mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
    mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
index d4b16a1de8eddc..5b6e03a2e6e75e 100644

--- a/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationInterfaces.h
@@ -27,6 +27,7 @@ class ModuleTranslation;
 }
 namespace gpu {
 enum class CompilationTarget : uint32_t;
+constexpr StringLiteral elfSectionName = "section";
 
 /// This class indicates that the attribute associated with this trait is a GPU
 /// offloading translation attribute. These kinds of attributes must implement
@@ -51,7 +52,7 @@ class TargetOptions {
   /// `Fatbin`.
   TargetOptions(
       StringRef toolkitPath = {}, ArrayRef<std::string> linkFiles = {},
-      StringRef cmdOptions = {},
+      StringRef cmdOptions = {}, StringRef elfSection = {},
       CompilationTarget compilationTarget = getDefaultCompilationTarget(),
       function_ref<SymbolTable *()> getSymbolTableCallback = {},
       function_ref<void(llvm::Module &)> initialLlvmIRCallback = {},
@@ -71,6 +72,9 @@ class TargetOptions {
   /// Returns the command line options.
   StringRef getCmdOptions() const;
 
+  /// Returns the ELF section.
+  StringRef getELFSection() const;
+
   /// Returns a tokenization of the command line options.
   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>>
   tokenizeCmdOptions() const;
@@ -110,6 +114,7 @@ class TargetOptions {
   TargetOptions(
       TypeID typeID, StringRef toolkitPath = {},
       ArrayRef<std::string> linkFiles = {}, StringRef cmdOptions = {},
+      StringRef elfSection = {},
       CompilationTarget compilationTarget = getDefaultCompilationTarget(),
       function_ref<SymbolTable *()> getSymbolTableCallback = {},
       function_ref<void(llvm::Module &)> initialLlvmIRCallback = {},
@@ -127,6 +132,9 @@ class TargetOptions {
   /// process.
   std::string cmdOptions;
 
+  /// ELF Section where the binary needs to be located
+  std::string elfSection;
+
   /// Compilation process target format.
   CompilationTarget compilationTarget;
 

diff  --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
index 4a9ddafdd177d2..e055164a1c384e 100644
--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -95,7 +95,9 @@ def GpuModuleToBinaryPass
     Option<"cmdOptions", "opts", "std::string", [{""}],
            "Command line options to pass to the tools.">,
     Option<"compilationTarget", "format", "std::string", [{"fatbin"}],
-           "The target representation of the compilation process.">
+           "The target representation of the compilation process.">,
+    Option<"elfSection", "section", "std::string", [{""}],
+           "ELF section where binary is to be located.">
   ];
 }
 

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index ee00fbeb28b61d..1fad251b2f79e0 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -2484,27 +2484,31 @@ KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const {
 
 TargetOptions::TargetOptions(
     StringRef toolkitPath, ArrayRef<std::string> linkFiles,
-    StringRef cmdOptions, CompilationTarget compilationTarget,
+    StringRef cmdOptions, StringRef elfSection,
+    CompilationTarget compilationTarget,
     function_ref<SymbolTable *()> getSymbolTableCallback,
     function_ref<void(llvm::Module &)> initialLlvmIRCallback,
     function_ref<void(llvm::Module &)> linkedLlvmIRCallback,
     function_ref<void(llvm::Module &)> optimizedLlvmIRCallback,
     function_ref<void(StringRef)> isaCallback)
     : TargetOptions(TypeID::get<TargetOptions>(), toolkitPath, linkFiles,
-                    cmdOptions, compilationTarget, getSymbolTableCallback,
-                    initialLlvmIRCallback, linkedLlvmIRCallback,
-                    optimizedLlvmIRCallback, isaCallback) {}
+                    cmdOptions, elfSection, compilationTarget,
+                    getSymbolTableCallback, initialLlvmIRCallback,
+                    linkedLlvmIRCallback, optimizedLlvmIRCallback,
+                    isaCallback) {}
 
 TargetOptions::TargetOptions(
     TypeID typeID, StringRef toolkitPath, ArrayRef<std::string> linkFiles,
-    StringRef cmdOptions, CompilationTarget compilationTarget,
+    StringRef cmdOptions, StringRef elfSection,
+    CompilationTarget compilationTarget,
     function_ref<SymbolTable *()> getSymbolTableCallback,
     function_ref<void(llvm::Module &)> initialLlvmIRCallback,
     function_ref<void(llvm::Module &)> linkedLlvmIRCallback,
     function_ref<void(llvm::Module &)> optimizedLlvmIRCallback,
     function_ref<void(StringRef)> isaCallback)
     : toolkitPath(toolkitPath.str()), linkFiles(linkFiles),
-      cmdOptions(cmdOptions.str()), compilationTarget(compilationTarget),
+      cmdOptions(cmdOptions.str()), elfSection(elfSection.str()),
+      compilationTarget(compilationTarget),
       getSymbolTableCallback(getSymbolTableCallback),
       initialLlvmIRCallback(initialLlvmIRCallback),
       linkedLlvmIRCallback(linkedLlvmIRCallback),
@@ -2519,6 +2523,8 @@ ArrayRef<std::string> TargetOptions::getLinkFiles() const { return linkFiles; }
 
 StringRef TargetOptions::getCmdOptions() const { return cmdOptions; }
 
+StringRef TargetOptions::getELFSection() const { return elfSection; }
+
 SymbolTable *TargetOptions::getSymbolTable() const {
   return getSymbolTableCallback ? getSymbolTableCallback() : nullptr;
 }

diff  --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
index 86a3b4780e88ce..295ece4782fdbf 100644
--- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp
@@ -69,8 +69,8 @@ void GpuModuleToBinaryPass::runOnOperation() {
     return &parentTable.value();
   };
 
-  TargetOptions targetOptions(toolkitPath, linkFiles, cmdOptions, *targetFormat,
-                              lazyTableBuilder);
+  TargetOptions targetOptions(toolkitPath, linkFiles, cmdOptions, elfSection,
+                              *targetFormat, lazyTableBuilder);
   if (failed(transformGpuModulesToBinaries(
           getOperation(), OffloadingLLVMTranslationAttrInterface(nullptr),
           targetOptions)))

diff  --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index 3c92359915ded4..745c1a5a6ee601 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -13,6 +13,7 @@
 
 #include "mlir/Target/LLVM/NVVM/Target.h"
 
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Target/LLVM/NVVM/Utils.h"
@@ -664,9 +665,18 @@ NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
   gpu::CompilationTarget format = options.getCompilationTarget();
   DictionaryAttr objectProps;
   Builder builder(attribute.getContext());
+  SmallVector<NamedAttribute, 2> properties;
   if (format == gpu::CompilationTarget::Assembly)
-    objectProps = builder.getDictionaryAttr(
-        {builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO()))});
+    properties.push_back(
+        builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO())));
+
+  if (StringRef section = options.getELFSection(); !section.empty())
+    properties.push_back(builder.getNamedAttr(gpu::elfSectionName,
+                                              builder.getStringAttr(section)));
+
+  if (!properties.empty())
+    objectProps = builder.getDictionaryAttr(properties);
+
   return builder.getAttr<gpu::ObjectAttr>(
       attribute, format,
       builder.getStringAttr(StringRef(object.data(), object.size())),

diff  --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
index 3aeadeb283ff69..ade239c526af86 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/SelectObjectAttr.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 
 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
@@ -124,7 +125,7 @@ LogicalResult SelectObjectAttrImpl::embedBinary(
 
   if (object.getProperties()) {
     if (auto section = mlir::dyn_cast_or_null<mlir::StringAttr>(
-            object.getProperties().get("section"))) {
+            object.getProperties().get(gpu::elfSectionName))) {
       serializedObj->setSection(section.getValue());
     }
   }

diff  --git a/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir b/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
index c286c8bc9042ff..e6284ccf94b505 100644
--- a/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
+++ b/mlir/test/Dialect/GPU/module-to-binary-nvvm.mlir
@@ -1,10 +1,12 @@
 // REQUIRES: host-supports-nvptx
 // RUN: mlir-opt %s --gpu-module-to-binary="format=llvm" | FileCheck %s
 // RUN: mlir-opt %s --gpu-module-to-binary="format=isa" | FileCheck %s -check-prefix=CHECK-ISA
+// RUN: mlir-opt %s --gpu-module-to-binary="format=llvm section=__fatbin" | FileCheck %s -check-prefix=CHECK-SECTION
 
 module attributes {gpu.container_module} {
   // CHECK-LABEL:gpu.binary @kernel_module1
   // CHECK:[#gpu.object<#nvvm.target<chip = "sm_70">, offload = "{{.*}}">]
+  // CHECK-SECTION: #gpu.object<#nvvm.target<chip = "sm_70">, properties = {section = "__fatbin"}
   gpu.module @kernel_module1 [#nvvm.target<chip = "sm_70">] {
     llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
         %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
@@ -25,6 +27,7 @@ module attributes {gpu.container_module} {
 
   // CHECK-LABEL:gpu.binary @kernel_module3 <#gpu.select_object<1 : i64>>
   // CHECK:[#gpu.object<#nvvm.target<chip = "sm_70">, offload = "{{.*}}">, #gpu.object<#nvvm.target<chip = "sm_80">, offload = "{{.*}}">]
+  // CHECK-SECTION: [#gpu.object<#nvvm.target<chip = "sm_70">, properties = {section = "__fatbin"},{{.*}} #gpu.object<#nvvm.target<chip = "sm_80">, properties = {section = "__fatbin"}
   gpu.module @kernel_module3 <#gpu.select_object<1>> [
       #nvvm.target<chip = "sm_70">,
       #nvvm.target<chip = "sm_80">] {

diff  --git a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
index eee9bd5f234751..a92ad18c956821 100644
--- a/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeNVVMTarget.cpp
@@ -81,7 +81,7 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMMToLLVM)) {
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Offload);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -117,7 +117,7 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToPTX)) {
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Assembly);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -147,7 +147,7 @@ TEST_F(MLIRTargetLLVMNVVM, SKIP_WITHOUT_NVPTX(SerializeNVVMToBinary)) {
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Binary);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -194,9 +194,9 @@ TEST_F(MLIRTargetLLVMNVVM,
     isaResult = isa.str();
   };
 
-  gpu::TargetOptions options({}, {}, {}, gpu::CompilationTarget::Assembly, {},
-                             initialCallback, linkedCallback, optimizedCallback,
-                             isaCallback);
+  gpu::TargetOptions options({}, {}, {}, {}, gpu::CompilationTarget::Assembly,
+                             {}, initialCallback, linkedCallback,
+                             optimizedCallback, isaCallback);
 
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =

diff  --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
index b02f34c812b3f4..d5e72a17131748 100644
--- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
@@ -83,7 +83,7 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToLLVM)) {
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Offload);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Offload);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -119,7 +119,7 @@ TEST_F(MLIRTargetLLVMROCDL,
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Assembly);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -145,7 +145,7 @@ TEST_F(MLIRTargetLLVMROCDL,
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Assembly);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -169,7 +169,7 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToPTX)) {
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Assembly);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Assembly);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -199,7 +199,7 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(SerializeROCDLToBinary)) {
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Binary);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);
@@ -243,7 +243,7 @@ TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) {
   // Serialize the module.
   auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
   ASSERT_TRUE(!!serializer);
-  gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Binary);
+  gpu::TargetOptions options("", {}, "", "", gpu::CompilationTarget::Binary);
   for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
     std::optional<SmallVector<char, 0>> object =
         serializer.serializeToObject(gpuModule, options);

diff  --git a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
index 63d1dbd2519bea..314f25833f38ca 100644
--- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
@@ -174,8 +174,8 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithInitialLLVMIR)) {
   };
 
   gpu::TargetOptions opts(
-      {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(), {},
-      initialCallback);
+      {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
+      {}, initialCallback);
   std::optional<SmallVector<char, 0>> serializedBinary =
       targetAttr.serializeToObject(*module, opts);
 
@@ -202,8 +202,8 @@ TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(CallbackInvokedWithLinkedLLVMIR)) {
   };
 
   gpu::TargetOptions opts(
-      {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(), {},
-      {}, linkedCallback);
+      {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
+      {}, {}, linkedCallback);
   std::optional<SmallVector<char, 0>> serializedBinary =
       targetAttr.serializeToObject(*module, opts);
 
@@ -231,8 +231,8 @@ TEST_F(MLIRTargetLLVM,
   };
 
   gpu::TargetOptions opts(
-      {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(), {},
-      {}, {}, optimizedCallback);
+      {}, {}, {}, {}, mlir::gpu::TargetOptions::getDefaultCompilationTarget(),
+      {}, {}, {}, optimizedCallback);
   std::optional<SmallVector<char, 0>> serializedBinary =
       targetAttr.serializeToObject(*module, opts);