[Mlir-commits] [mlir] Add optional attributes of kernelModule and kernelFunc for outlining kernels. (PR #118861)

Thu Dec 5 11:31:53 PST 2024

https://github.com/wangzpgi created https://github.com/llvm/llvm-project/pull/118861

Adding optional attributes so we can specify the kernel function names and the kernel module names generated.

>From 3128e6eb773e63fd0c9ca7bb10e30df37a939c3f Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Wed, 4 Dec 2024 16:50:51 -0800
Subject: [PATCH 1/3] Add optional attribute outline_module to gpu.launch

---
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td          | 12 +++++++++++-
 mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 13 ++++++++++---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index d08e7ceb9e6c69..1a393cf3daba8c 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -803,7 +803,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
                Optional<Index>:$clusterSizeX,
                Optional<Index>:$clusterSizeY,
                Optional<Index>:$clusterSizeZ,
-               Optional<I32>:$dynamicSharedMemorySize)>,
+               Optional<I32>:$dynamicSharedMemorySize,
+               OptionalAttr<SymbolRefAttr>:$outlineModule)>,
     Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
   let summary = "GPU kernel launch operation";
 
@@ -837,6 +838,10 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     -   a variadic number of Workgroup memory attributions.
     -   a variadic number of Private memory attributions.
 
+    The `outline_module` attribute is optional and specifies a module in which 
+    the kernel should be outlined. When this attribute is present, the kernel is
+    outlined into the specified module instead of the default behavior.
+
     Syntax:
 
     ```
@@ -1030,6 +1035,11 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     static StringRef getNumWorkgroupAttributionsAttrName() {
       return "workgroup_attributions";
     }
+
+    /// Checks if the outline_module attribute is present.
+    bool hasOutlineModule() {
+      return getOutlineModule().has_value();
+    }
   }];
 
   let hasCanonicalizer = 1;
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 5f6556d915f41c..65b63e0f5b71db 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -364,9 +364,16 @@ class GpuKernelOutliningPass
       Block::iterator insertPt(func->getNextNode());
       auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
         SetVector<Value> operands;
-        std::string kernelFnName =
-            Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
-                .str();
+        std::string kernelFnName;
+        if (auto outlineModuleAttr = op->getAttrOfType<SymbolRefAttr>("outline_module")) {
+          kernelFnName = outlineModuleAttr.getRootReference().str();
+          llvm::errs() << "outlined module name = " << kernelFnName << "\n";
+        } else {
+          kernelFnName =
+              Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
+                  .str();
+          llvm::errs() << "original module name = " << kernelFnName << "\n";
+        }
 
         gpu::GPUFuncOp outlinedFunc =
             outlineKernelFuncImpl(op, kernelFnName, operands);

>From 52ad728bef876011b46991074d69a7c70b708255 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 10:37:00 -0800
Subject: [PATCH 2/3] Add optional attributes kernelFunc and kernelModule to
 specify the kernel function name or kernel module name.

---
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td    | 19 ++++++----
 .../GPU/Transforms/KernelOutlining.cpp        | 36 ++++++++++++++-----
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 1a393cf3daba8c..94d3872a45e2f2 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -804,7 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
                Optional<Index>:$clusterSizeY,
                Optional<Index>:$clusterSizeZ,
                Optional<I32>:$dynamicSharedMemorySize,
-               OptionalAttr<SymbolRefAttr>:$outlineModule)>,
+               OptionalAttr<SymbolRefAttr>:$kernelFunc,
+               OptionalAttr<SymbolRefAttr>:$kernelModule)>,
     Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
   let summary = "GPU kernel launch operation";
 
@@ -838,9 +839,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     -   a variadic number of Workgroup memory attributions.
     -   a variadic number of Private memory attributions.
 
-    The `outline_module` attribute is optional and specifies a module in which 
-    the kernel should be outlined. When this attribute is present, the kernel is
-    outlined into the specified module instead of the default behavior.
+    The `kernelFunc` and `kernelModule` attributes are optional and specifies the kernel name and a module in whichthe kernel should be outlined. 
+
 
     Syntax:
 
@@ -1036,9 +1036,14 @@ def GPU_LaunchOp : GPU_Op<"launch", [
       return "workgroup_attributions";
     }
 
-    /// Checks if the outline_module attribute is present.
-    bool hasOutlineModule() {
-      return getOutlineModule().has_value();
+    /// Checks if the kernel func name attribute is present.
+    bool hasKernelFuncName() {
+      return getKernelFunc().has_value();
+    }
+
+    /// Checks if the kernel module name attribute is present.
+    bool hasKernelModuleName() {
+      return getKernelModule().has_value();
     }
   }];
 
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 65b63e0f5b71db..6028cb58d6842c 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -365,14 +365,14 @@ class GpuKernelOutliningPass
       auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
         SetVector<Value> operands;
         std::string kernelFnName;
-        if (auto outlineModuleAttr = op->getAttrOfType<SymbolRefAttr>("outline_module")) {
-          kernelFnName = outlineModuleAttr.getRootReference().str();
-          llvm::errs() << "outlined module name = " << kernelFnName << "\n";
+        if (op.hasKernelFuncName()) {
+          kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc").getRootReference().str();
+          llvm::errs() << "use provided kernel func name = " << kernelFnName << "\n";
         } else {
           kernelFnName =
               Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
                   .str();
-          llvm::errs() << "original module name = " << kernelFnName << "\n";
+          llvm::errs() << "use default kernel func name = " << kernelFnName << "\n";
         }
 
         gpu::GPUFuncOp outlinedFunc =
@@ -381,7 +381,7 @@ class GpuKernelOutliningPass
         // Create nested module and insert outlinedFunc. The module will
         // originally get the same name as the function, but may be renamed on
         // insertion into the parent module.
-        auto kernelModule = createKernelModule(outlinedFunc, symbolTable);
+        auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
         symbolTable.insert(kernelModule, insertPt);
 
         // Potentially changes signature, pulling in constants.
@@ -402,7 +402,7 @@ class GpuKernelOutliningPass
 
 private:
   /// Returns a gpu.module containing kernelFunc and all callees (recursive).
-  gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
+  gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
                                       const SymbolTable &parentSymbolTable) {
     // TODO: This code cannot use an OpBuilder because it must be inserted into
     // a SymbolTable by the caller. SymbolTable needs to be refactored to
@@ -410,8 +410,26 @@ class GpuKernelOutliningPass
     // and then this needs to use the OpBuilder.
     auto *context = getOperation().getContext();
     OpBuilder builder(context);
-    auto kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
-                                                         kernelFunc.getName());
+    std::string kernelModuleName;
+    if (gpuLaunchOp.hasKernelModuleName()) {
+      kernelModuleName = gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule").getRootReference().str();
+      llvm::errs() << "use provided kernel module name = " << kernelModuleName << "\n";
+    } else {
+      kernelModuleName = kernelFunc.getName();
+      llvm::errs() << "use default kernel module name = " << kernelModuleName << "\n";
+    }
+
+    gpu::GPUModuleOp kernelModule;
+    // Check if the module already exists in the symbol table
+    if (auto existingModule = parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
+      llvm::errs() << "Reusing existing kernel module: " << kernelModuleName << "\n";
+      kernelModule = existingModule;
+    } else {
+      // If not found, create a new GPU module
+      llvm::errs() << "Creating new kernel module: " << kernelModuleName << "\n";
+      kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
+                                                           kernelModuleName);
+    }
 
     // If a valid data layout spec was provided, attach it to the kernel module.
     // Otherwise, the default data layout will be used.
@@ -439,6 +457,8 @@ class GpuKernelOutliningPass
       }
     }
 
+    //llvm::errs() << "kernelModule:\n" << kernelModule << "\n";
+
     return kernelModule;
   }
 

>From f20aa88382612d0ff0c801b066f6970b37e63ddf Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 11:27:57 -0800
Subject: [PATCH 3/3] formatting

---
 mlir/include/mlir/Dialect/GPU/IR/GPUOps.td    |  3 +-
 .../GPU/Transforms/KernelOutlining.cpp        | 28 +++++++++----------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 94d3872a45e2f2..71d14f5f7774b9 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -839,7 +839,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     -   a variadic number of Workgroup memory attributions.
     -   a variadic number of Private memory attributions.
 
-    The `kernelFunc` and `kernelModule` attributes are optional and specifies the kernel name and a module in whichthe kernel should be outlined. 
+    The `kernelFunc` and `kernelModule` attributes are optional and specifies
+    the kernel name and a module in whichthe kernel should be outlined. 
 
 
     Syntax:
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 6028cb58d6842c..872200566bb315 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -366,13 +366,14 @@ class GpuKernelOutliningPass
         SetVector<Value> operands;
         std::string kernelFnName;
         if (op.hasKernelFuncName()) {
-          kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc").getRootReference().str();
-          llvm::errs() << "use provided kernel func name = " << kernelFnName << "\n";
+          kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc")
+                             .getRootReference()
+                             .str();
         } else {
           kernelFnName =
-              Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
+              Twine(op->getParentOfType<SymbolOpInterface>().getName(),
+                    "_kernel")
                   .str();
-          llvm::errs() << "use default kernel func name = " << kernelFnName << "\n";
         }
 
         gpu::GPUFuncOp outlinedFunc =
@@ -402,7 +403,8 @@ class GpuKernelOutliningPass
 
 private:
   /// Returns a gpu.module containing kernelFunc and all callees (recursive).
-  gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
+  gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,
+                                      gpu::GPUFuncOp kernelFunc,
                                       const SymbolTable &parentSymbolTable) {
     // TODO: This code cannot use an OpBuilder because it must be inserted into
     // a SymbolTable by the caller. SymbolTable needs to be refactored to
@@ -412,23 +414,23 @@ class GpuKernelOutliningPass
     OpBuilder builder(context);
     std::string kernelModuleName;
     if (gpuLaunchOp.hasKernelModuleName()) {
-      kernelModuleName = gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule").getRootReference().str();
-      llvm::errs() << "use provided kernel module name = " << kernelModuleName << "\n";
+      kernelModuleName =
+          gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule")
+              .getRootReference()
+              .str();
     } else {
       kernelModuleName = kernelFunc.getName();
-      llvm::errs() << "use default kernel module name = " << kernelModuleName << "\n";
     }
 
     gpu::GPUModuleOp kernelModule;
     // Check if the module already exists in the symbol table
-    if (auto existingModule = parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
-      llvm::errs() << "Reusing existing kernel module: " << kernelModuleName << "\n";
+    if (auto existingModule =
+            parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
       kernelModule = existingModule;
     } else {
       // If not found, create a new GPU module
-      llvm::errs() << "Creating new kernel module: " << kernelModuleName << "\n";
       kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
-                                                           kernelModuleName);
+                                                      kernelModuleName);
     }
 
     // If a valid data layout spec was provided, attach it to the kernel module.
@@ -457,8 +459,6 @@ class GpuKernelOutliningPass
       }
     }
 
-    //llvm::errs() << "kernelModule:\n" << kernelModule << "\n";
-
     return kernelModule;
   }