[Mlir-commits] [mlir] fbbb8ad - [mlir][gpu] Add passes to attach (NVVM|ROCDL) target attributes to GPU Modules

Fri Aug 11 17:45:38 PDT 2023

Author: Fabian Mora
Date: 2023-08-12T00:45:26Z
New Revision: fbbb8adef15cc0f598facc7c6ecffaa247f60b48

URL: https://github.com/llvm/llvm-project/commit/fbbb8adef15cc0f598facc7c6ecffaa247f60b48
DIFF: https://github.com/llvm/llvm-project/commit/fbbb8adef15cc0f598facc7c6ecffaa247f60b48.diff

LOG: [mlir][gpu] Add passes to attach (NVVM|ROCDL) target attributes to GPU Modules

Adds the passes `nvvm-attach-target` & `rocdl-attach-target for attaching `nvvm.target` & `rocdl.target` attributes to GPU Modules.

These passes search GPU Modules in the immediate region of the Op being acted on, attaching the target attribute to the module.
Modules can be selected using a regex string, allowing fine grain attachment of targets, see the test `attach-target.mlir` for an example.

Depends on D154153

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D157351

Added: 
    mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
    mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp
    mlir/test/Dialect/LLVMIR/attach-targets.mlir

Modified: 
    mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
    mlir/lib/Dialect/GPU/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
index b3989009292bff..fc20bd2ed921ae 100644

--- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.td
@@ -82,4 +82,109 @@ def GpuModuleToBinaryPass
   ];
 }
 
+def GpuNVVMAttachTarget: Pass<"nvvm-attach-target", ""> {
+  let summary = "Attaches an NVVM target attribute to a GPU Module.";
+  let description = [{
+    This pass searches for all GPU Modules in the immediate regions and attaches
+    an NVVM target if the module matches the name specified by the `module` argument.
+
+    Example:
+    ```
+    // File: in.mlir:
+    gpu.module @nvvm_module_1 {...}
+    gpu.module @nvvm_module_2 {...}
+    gpu.module @rocdl_module_1 {...}
+    // mlir-opt --nvvm-attach-target="module=nvvm.* chip=sm_90" in.mlir
+    gpu.module @nvvm_module_1 [#nvvm.target<chip = "sm_90">] {...}
+    gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_90">] {...}
+    gpu.module @rocdl_module_1 {...}
+    ```
+  }];
+  let options = [
+    Option<"moduleMatcher", "module", "std::string",
+           /*default=*/ [{""}],
+           "Regex used to identify the modules to attach the target to.">,
+    Option<"triple", "triple", "std::string",
+           /*default=*/ "\"nvptx64-nvidia-cuda\"",
+           "Target triple.">,
+    Option<"chip", "chip", "std::string",
+           /*default=*/"\"sm_50\"",
+           "Target chip.">,
+    Option<"features", "features", "std::string",
+           /*default=*/"\"+ptx60\"",
+           "Target features.">,
+    Option<"optLevel", "O", "unsigned",
+           /*default=*/"2",
+           "Optimization level.">,
+    Option<"fastFlag", "fast", "bool",
+           /*default=*/"false",
+           "Enable fast math mode.">,
+    Option<"ftzFlag", "ftz", "bool",
+           /*default=*/"false",
+           "Enable flush to zero for denormals.">,
+    ListOption<"linkLibs", "l", "std::string",
+           "Extra bitcode libraries paths to link to.">,
+  ];
+}
+
+def GpuROCDLAttachTarget: Pass<"rocdl-attach-target", ""> {
+  let summary = "Attaches a ROCDL target attribute to a GPU Module.";
+  let description = [{
+    This pass searches for all GPU Modules in the immediate regions and attaches
+    a ROCDL target if the module matches the name specified by the `module` argument.
+
+    Example:
+    ```
+    // File: in.mlir:
+    gpu.module @nvvm_module_1 {...}
+    gpu.module @nvvm_module_2 {...}
+    gpu.module @rocdl_module_1 {...}
+    // mlir-opt --nvvm-attach-target="module=rocdl.* chip=gfx90a" in.mlir
+    gpu.module @nvvm_module_1 {...}
+    gpu.module @nvvm_module_2 {...}
+    gpu.module @rocdl_module_1 [#rocdl.target<chip = "gfx90a">] {...}
+    ```
+  }];
+  let options = [
+    Option<"moduleMatcher", "module", "std::string",
+           /*default=*/ [{""}],
+           "Regex used to identify the modules to attach the target to.">,
+    Option<"triple", "triple", "std::string",
+           /*default=*/ "\"amdgcn-amd-amdhsa\"",
+           "Target triple.">,
+    Option<"chip", "chip", "std::string",
+           /*default=*/"\"gfx900\"",
+           "Target chip.">,
+    Option<"features", "features", "std::string",
+           /*default=*/"\"\"",
+           "Target features.">,
+    Option<"abiVersion", "abi", "std::string",
+           /*default=*/"\"400\"",
+           "Optimization level.">,
+    Option<"optLevel", "O", "unsigned",
+           /*default=*/"2",
+           "Optimization level.">,
+    Option<"wave64Flag", "wave64", "bool",
+           /*default=*/"true",
+           "Use Wave64 mode.">,
+    Option<"fastFlag", "fast", "bool",
+           /*default=*/"false",
+           "Enable fast relaxed math opt.">,
+    Option<"dazFlag", "daz", "bool",
+           /*default=*/"false",
+           "Enable denormals are zero opt.">,
+    Option<"finiteOnlyFlag", "finite-only", "bool",
+           /*default=*/"false",
+           "Enable finite only opt.">,
+    Option<"unsafeMathFlag", "unsafe-math", "bool",
+           /*default=*/"false",
+           "Enable unsafe math opt.">,
+    Option<"correctSqrtFlag", "correct-sqrt", "bool",
+           /*default=*/"true",
+           "Enable correct rounded sqrt.">,
+    ListOption<"linkLibs", "l", "std::string",
+           "Extra bitcode libraries paths to link to.">,
+  ];
+}
+
 #endif // MLIR_DIALECT_GPU_PASSES

diff  --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 00b66f9699a3d9..7109f916363a56 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -52,11 +52,13 @@ add_mlir_dialect_library(MLIRGPUTransforms
   Transforms/KernelOutlining.cpp
   Transforms/MemoryPromotion.cpp
   Transforms/ModuleToBinary.cpp
+  Transforms/NVVMAttachTarget.cpp
   Transforms/ParallelLoopMapper.cpp
   Transforms/SerializeToBlob.cpp
   Transforms/SerializeToCubin.cpp
   Transforms/SerializeToHsaco.cpp
   Transforms/ShuffleRewriter.cpp
+  Transforms/ROCDLAttachTarget.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU

diff  --git a/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
new file mode 100644
index 00000000000000..1167002d9282ff
--- /dev/null
+++ b/mlir/lib/Dialect/GPU/Transforms/NVVMAttachTarget.cpp
@@ -0,0 +1,86 @@
+//===- NVVMAttachTarget.cpp - Attach an NVVM target -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the `GpuNVVMAttachTarget` pass, attaching `#nvvm.target`
+// attributes to GPU modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVM/NVVM/Target.h"
+#include "llvm/Support/Regex.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_GPUNVVMATTACHTARGET
+#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+using namespace mlir::NVVM;
+
+namespace {
+struct NVVMAttachTarget
+    : public impl::GpuNVVMAttachTargetBase<NVVMAttachTarget> {
+  using Base::Base;
+
+  DictionaryAttr getFlags(OpBuilder &builder) const;
+
+  void runOnOperation() override;
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registerNVVMTarget(registry);
+  }
+};
+} // namespace
+
+DictionaryAttr NVVMAttachTarget::getFlags(OpBuilder &builder) const {
+  UnitAttr unitAttr = builder.getUnitAttr();
+  SmallVector<NamedAttribute, 2> flags;
+  auto addFlag = [&](StringRef flag) {
+    flags.push_back(builder.getNamedAttr(flag, unitAttr));
+  };
+  if (fastFlag)
+    addFlag("fast");
+  if (ftzFlag)
+    addFlag("ftz");
+  if (flags.size())
+    return builder.getDictionaryAttr(flags);
+  return nullptr;
+}
+
+void NVVMAttachTarget::runOnOperation() {
+  OpBuilder builder(&getContext());
+  ArrayRef<std::string> libs(linkLibs);
+  SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
+  auto target = builder.getAttr<NVVMTargetAttr>(
+      optLevel, triple, chip, features, getFlags(builder),
+      filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
+  llvm::Regex matcher(moduleMatcher);
+  for (Region &region : getOperation()->getRegions())
+    for (Block &block : region.getBlocks())
+      for (auto module : block.getOps<gpu::GPUModuleOp>()) {
+        // Check if the name of the module matches.
+        if (!moduleMatcher.empty() && !matcher.match(module.getName()))
+          continue;
+        // Create the target array.
+        SmallVector<Attribute> targets;
+        if (std::optional<ArrayAttr> attrs = module.getTargets())
+          targets.append(attrs->getValue().begin(), attrs->getValue().end());
+        targets.push_back(target);
+        // Remove any duplicate targets.
+        targets.erase(std::unique(targets.begin(), targets.end()),
+                      targets.end());
+        // Update the target attribute array.
+        module.setTargetsAttr(builder.getArrayAttr(targets));
+      }
+}

diff  --git a/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp b/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp
new file mode 100644
index 00000000000000..934ee666bef5e7
--- /dev/null
+++ b/mlir/lib/Dialect/GPU/Transforms/ROCDLAttachTarget.cpp
@@ -0,0 +1,94 @@
+//===- ROCDLAttachTarget.cpp - Attach an ROCDL target ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the `GpuROCDLAttachTarget` pass, attaching
+// `#rocdl.target` attributes to GPU modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVM/ROCDL/Target.h"
+#include "llvm/Support/Regex.h"
+
+namespace mlir {
+#define GEN_PASS_DEF_GPUROCDLATTACHTARGET
+#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
+} // namespace mlir
+
+using namespace mlir;
+using namespace mlir::ROCDL;
+
+namespace {
+struct ROCDLAttachTarget
+    : public impl::GpuROCDLAttachTargetBase<ROCDLAttachTarget> {
+  using Base::Base;
+
+  DictionaryAttr getFlags(OpBuilder &builder) const;
+
+  void runOnOperation() override;
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registerROCDLTarget(registry);
+  }
+};
+} // namespace
+
+DictionaryAttr ROCDLAttachTarget::getFlags(OpBuilder &builder) const {
+  UnitAttr unitAttr = builder.getUnitAttr();
+  SmallVector<NamedAttribute, 6> flags;
+  auto addFlag = [&](StringRef flag) {
+    flags.push_back(builder.getNamedAttr(flag, unitAttr));
+  };
+  if (!wave64Flag)
+    addFlag("no_wave64");
+  if (fastFlag)
+    addFlag("fast");
+  if (dazFlag)
+    addFlag("daz");
+  if (finiteOnlyFlag)
+    addFlag("finite_only");
+  if (unsafeMathFlag)
+    addFlag("unsafe_math");
+  if (!correctSqrtFlag)
+    addFlag("unsafe_sqrt");
+  if (flags.size())
+    return builder.getDictionaryAttr(flags);
+  return nullptr;
+}
+
+void ROCDLAttachTarget::runOnOperation() {
+  OpBuilder builder(&getContext());
+  ArrayRef<std::string> libs(linkLibs);
+  SmallVector<StringRef> filesToLink(libs.begin(), libs.end());
+  auto target = builder.getAttr<ROCDLTargetAttr>(
+      optLevel, triple, chip, features, abiVersion, getFlags(builder),
+      filesToLink.size() ? builder.getStrArrayAttr(filesToLink) : nullptr);
+  llvm::Regex matcher(moduleMatcher);
+  for (Region &region : getOperation()->getRegions())
+    for (Block &block : region.getBlocks())
+      for (auto module : block.getOps<gpu::GPUModuleOp>()) {
+        // Check if the name of the module matches.
+        if (!moduleMatcher.empty() && !matcher.match(module.getName()))
+          continue;
+        // Create the target array.
+        SmallVector<Attribute> targets;
+        if (std::optional<ArrayAttr> attrs = module.getTargets())
+          targets.append(attrs->getValue().begin(), attrs->getValue().end());
+        targets.push_back(target);
+        // Remove any duplicate targets.
+        targets.erase(std::unique(targets.begin(), targets.end()),
+                      targets.end());
+        // Update the target attribute array.
+        module.setTargetsAttr(builder.getArrayAttr(targets));
+      }
+}

diff  --git a/mlir/test/Dialect/LLVMIR/attach-targets.mlir b/mlir/test/Dialect/LLVMIR/attach-targets.mlir
new file mode 100644
index 00000000000000..83733db400798e
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/attach-targets.mlir
@@ -0,0 +1,29 @@
+// RUN: mlir-opt %s --nvvm-attach-target='module=nvvm.* O=3 chip=sm_90' --rocdl-attach-target='module=rocdl.* O=3 chip=gfx90a' | FileCheck %s
+// RUN: mlir-opt %s --nvvm-attach-target='module=options.* O=1 chip=sm_70 fast=true ftz=true' --rocdl-attach-target='module=options.* l=file1.bc,file2.bc wave64=false finite-only=true' | FileCheck %s --check-prefix=CHECK_OPTS
+
+module attributes {gpu.container_module} {
+// Verify the target is appended.
+// CHECK: @nvvm_module_1 [#nvvm.target<O = 3, chip = "sm_90">] {
+gpu.module @nvvm_module_1 {
+}
+// Verify the target is appended.
+// CHECK: @nvvm_module_2 [#nvvm.target<chip = "sm_60">, #nvvm.target<O = 3, chip = "sm_90">] {
+gpu.module @nvvm_module_2 [#nvvm.target<chip = "sm_60">] {
+}
+// Verify the target is not added multiple times.
+// CHECK: @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
+gpu.module @nvvm_module_3 [#nvvm.target<O = 3, chip = "sm_90">] {
+}
+// Verify the NVVM target is not added as it fails to match the regex, but the ROCDL does get appended.
+// CHECK: @rocdl_module [#rocdl.target<O = 3, chip = "gfx90a">] {
+gpu.module @rocdl_module {
+}
+// Check the options were added.
+// CHECK_OPTS: @options_module_1 [#nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>]  {
+gpu.module @options_module_1 {
+}
+// Check the options were added and that the first target was preserved.
+// CHECK_OPTS: @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">, #nvvm.target<O = 1, chip = "sm_70", flags = {fast, ftz}>, #rocdl.target<flags = {finite_only, no_wave64}, link = ["file1.bc", "file2.bc"]>]  {
+gpu.module @options_module_2 [#nvvm.target<O = 3, chip = "sm_90">] {
+}
+}