[Mlir-commits] [mlir] [mlir][gpu] Add optional attributes of kernelModule and kernelFunc for outlining kernels. (PR #118861)
Zhen Wang
llvmlistbot at llvm.org
Thu Dec 5 21:12:51 PST 2024
https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/118861
>From 524fc1393f649657a83ec1c7a3aa02491b779c1f Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Wed, 4 Dec 2024 16:50:51 -0800
Subject: [PATCH 1/7] Add optional attribute outline_module to gpu.launch
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 12 +++++++++++-
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 13 ++++++++++---
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index d08e7ceb9e6c69..1a393cf3daba8c 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -803,7 +803,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Optional<Index>:$clusterSizeX,
Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ,
- Optional<I32>:$dynamicSharedMemorySize)>,
+ Optional<I32>:$dynamicSharedMemorySize,
+ OptionalAttr<SymbolRefAttr>:$outlineModule)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -837,6 +838,10 @@ def GPU_LaunchOp : GPU_Op<"launch", [
- a variadic number of Workgroup memory attributions.
- a variadic number of Private memory attributions.
+ The `outline_module` attribute is optional and specifies a module in which
+ the kernel should be outlined. When this attribute is present, the kernel is
+ outlined into the specified module instead of the default behavior.
+
Syntax:
```
@@ -1030,6 +1035,11 @@ def GPU_LaunchOp : GPU_Op<"launch", [
static StringRef getNumWorkgroupAttributionsAttrName() {
return "workgroup_attributions";
}
+
+ /// Checks if the outline_module attribute is present.
+ bool hasOutlineModule() {
+ return getOutlineModule().has_value();
+ }
}];
let hasCanonicalizer = 1;
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 5f6556d915f41c..65b63e0f5b71db 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -364,9 +364,16 @@ class GpuKernelOutliningPass
Block::iterator insertPt(func->getNextNode());
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
SetVector<Value> operands;
- std::string kernelFnName =
- Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
- .str();
+ std::string kernelFnName;
+ if (auto outlineModuleAttr = op->getAttrOfType<SymbolRefAttr>("outline_module")) {
+ kernelFnName = outlineModuleAttr.getRootReference().str();
+ llvm::errs() << "outlined module name = " << kernelFnName << "\n";
+ } else {
+ kernelFnName =
+ Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
+ .str();
+ llvm::errs() << "original module name = " << kernelFnName << "\n";
+ }
gpu::GPUFuncOp outlinedFunc =
outlineKernelFuncImpl(op, kernelFnName, operands);
>From 303c7f95669b5d29d9a208626c2baaadb3638e94 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 10:37:00 -0800
Subject: [PATCH 2/7] Add optional attributes kernelFunc and kernelModule to
specify the kernel function name or kernel module name.
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 19 ++++++----
.../GPU/Transforms/KernelOutlining.cpp | 36 ++++++++++++++-----
2 files changed, 40 insertions(+), 15 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 1a393cf3daba8c..94d3872a45e2f2 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -804,7 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ,
Optional<I32>:$dynamicSharedMemorySize,
- OptionalAttr<SymbolRefAttr>:$outlineModule)>,
+ OptionalAttr<SymbolRefAttr>:$kernelFunc,
+ OptionalAttr<SymbolRefAttr>:$kernelModule)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -838,9 +839,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
- a variadic number of Workgroup memory attributions.
- a variadic number of Private memory attributions.
- The `outline_module` attribute is optional and specifies a module in which
- the kernel should be outlined. When this attribute is present, the kernel is
- outlined into the specified module instead of the default behavior.
+ The `kernelFunc` and `kernelModule` attributes are optional and specifies the kernel name and a module in whichthe kernel should be outlined.
+
Syntax:
@@ -1036,9 +1036,14 @@ def GPU_LaunchOp : GPU_Op<"launch", [
return "workgroup_attributions";
}
- /// Checks if the outline_module attribute is present.
- bool hasOutlineModule() {
- return getOutlineModule().has_value();
+ /// Checks if the kernel func name attribute is present.
+ bool hasKernelFuncName() {
+ return getKernelFunc().has_value();
+ }
+
+ /// Checks if the kernel module name attribute is present.
+ bool hasKernelModuleName() {
+ return getKernelModule().has_value();
}
}];
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 65b63e0f5b71db..6028cb58d6842c 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -365,14 +365,14 @@ class GpuKernelOutliningPass
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
SetVector<Value> operands;
std::string kernelFnName;
- if (auto outlineModuleAttr = op->getAttrOfType<SymbolRefAttr>("outline_module")) {
- kernelFnName = outlineModuleAttr.getRootReference().str();
- llvm::errs() << "outlined module name = " << kernelFnName << "\n";
+ if (op.hasKernelFuncName()) {
+ kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc").getRootReference().str();
+ llvm::errs() << "use provided kernel func name = " << kernelFnName << "\n";
} else {
kernelFnName =
Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
.str();
- llvm::errs() << "original module name = " << kernelFnName << "\n";
+ llvm::errs() << "use default kernel func name = " << kernelFnName << "\n";
}
gpu::GPUFuncOp outlinedFunc =
@@ -381,7 +381,7 @@ class GpuKernelOutliningPass
// Create nested module and insert outlinedFunc. The module will
// originally get the same name as the function, but may be renamed on
// insertion into the parent module.
- auto kernelModule = createKernelModule(outlinedFunc, symbolTable);
+ auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
symbolTable.insert(kernelModule, insertPt);
// Potentially changes signature, pulling in constants.
@@ -402,7 +402,7 @@ class GpuKernelOutliningPass
private:
/// Returns a gpu.module containing kernelFunc and all callees (recursive).
- gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
+ gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
const SymbolTable &parentSymbolTable) {
// TODO: This code cannot use an OpBuilder because it must be inserted into
// a SymbolTable by the caller. SymbolTable needs to be refactored to
@@ -410,8 +410,26 @@ class GpuKernelOutliningPass
// and then this needs to use the OpBuilder.
auto *context = getOperation().getContext();
OpBuilder builder(context);
- auto kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
- kernelFunc.getName());
+ std::string kernelModuleName;
+ if (gpuLaunchOp.hasKernelModuleName()) {
+ kernelModuleName = gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule").getRootReference().str();
+ llvm::errs() << "use provided kernel module name = " << kernelModuleName << "\n";
+ } else {
+ kernelModuleName = kernelFunc.getName();
+ llvm::errs() << "use default kernel module name = " << kernelModuleName << "\n";
+ }
+
+ gpu::GPUModuleOp kernelModule;
+ // Check if the module already exists in the symbol table
+ if (auto existingModule = parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
+ llvm::errs() << "Reusing existing kernel module: " << kernelModuleName << "\n";
+ kernelModule = existingModule;
+ } else {
+ // If not found, create a new GPU module
+ llvm::errs() << "Creating new kernel module: " << kernelModuleName << "\n";
+ kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
+ kernelModuleName);
+ }
// If a valid data layout spec was provided, attach it to the kernel module.
// Otherwise, the default data layout will be used.
@@ -439,6 +457,8 @@ class GpuKernelOutliningPass
}
}
+ //llvm::errs() << "kernelModule:\n" << kernelModule << "\n";
+
return kernelModule;
}
>From c78e8360c93747c4a8639fcf5a8d37219d96ea9c Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 11:27:57 -0800
Subject: [PATCH 3/7] formatting
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 3 +-
.../GPU/Transforms/KernelOutlining.cpp | 28 +++++++++----------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 94d3872a45e2f2..71d14f5f7774b9 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -839,7 +839,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
- a variadic number of Workgroup memory attributions.
- a variadic number of Private memory attributions.
- The `kernelFunc` and `kernelModule` attributes are optional and specifies the kernel name and a module in whichthe kernel should be outlined.
+ The `kernelFunc` and `kernelModule` attributes are optional and specifies
+ the kernel name and a module in whichthe kernel should be outlined.
Syntax:
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 6028cb58d6842c..872200566bb315 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -366,13 +366,14 @@ class GpuKernelOutliningPass
SetVector<Value> operands;
std::string kernelFnName;
if (op.hasKernelFuncName()) {
- kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc").getRootReference().str();
- llvm::errs() << "use provided kernel func name = " << kernelFnName << "\n";
+ kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc")
+ .getRootReference()
+ .str();
} else {
kernelFnName =
- Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
+ Twine(op->getParentOfType<SymbolOpInterface>().getName(),
+ "_kernel")
.str();
- llvm::errs() << "use default kernel func name = " << kernelFnName << "\n";
}
gpu::GPUFuncOp outlinedFunc =
@@ -402,7 +403,8 @@ class GpuKernelOutliningPass
private:
/// Returns a gpu.module containing kernelFunc and all callees (recursive).
- gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
+ gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,
+ gpu::GPUFuncOp kernelFunc,
const SymbolTable &parentSymbolTable) {
// TODO: This code cannot use an OpBuilder because it must be inserted into
// a SymbolTable by the caller. SymbolTable needs to be refactored to
@@ -412,23 +414,23 @@ class GpuKernelOutliningPass
OpBuilder builder(context);
std::string kernelModuleName;
if (gpuLaunchOp.hasKernelModuleName()) {
- kernelModuleName = gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule").getRootReference().str();
- llvm::errs() << "use provided kernel module name = " << kernelModuleName << "\n";
+ kernelModuleName =
+ gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule")
+ .getRootReference()
+ .str();
} else {
kernelModuleName = kernelFunc.getName();
- llvm::errs() << "use default kernel module name = " << kernelModuleName << "\n";
}
gpu::GPUModuleOp kernelModule;
// Check if the module already exists in the symbol table
- if (auto existingModule = parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
- llvm::errs() << "Reusing existing kernel module: " << kernelModuleName << "\n";
+ if (auto existingModule =
+ parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
kernelModule = existingModule;
} else {
// If not found, create a new GPU module
- llvm::errs() << "Creating new kernel module: " << kernelModuleName << "\n";
kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
- kernelModuleName);
+ kernelModuleName);
}
// If a valid data layout spec was provided, attach it to the kernel module.
@@ -457,8 +459,6 @@ class GpuKernelOutliningPass
}
}
- //llvm::errs() << "kernelModule:\n" << kernelModule << "\n";
-
return kernelModule;
}
>From 0d422ef5922852be734ecb86fb30d648fc2f152b Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 14:45:59 -0800
Subject: [PATCH 4/7] address review feedbacks
---
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 872200566bb315..da011be1c7eb79 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -365,10 +365,8 @@ class GpuKernelOutliningPass
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
SetVector<Value> operands;
std::string kernelFnName;
- if (op.hasKernelFuncName()) {
- kernelFnName = op->getAttrOfType<mlir::SymbolRefAttr>("kernelFunc")
- .getRootReference()
- .str();
+ if (op.getKernelFunc()) {
+ kernelFnName = op.getKernelFunc()->getRootReference().str();
} else {
kernelFnName =
Twine(op->getParentOfType<SymbolOpInterface>().getName(),
@@ -413,11 +411,9 @@ class GpuKernelOutliningPass
auto *context = getOperation().getContext();
OpBuilder builder(context);
std::string kernelModuleName;
- if (gpuLaunchOp.hasKernelModuleName()) {
+ if (gpuLaunchOp.getKernelModule()) {
kernelModuleName =
- gpuLaunchOp->getAttrOfType<mlir::SymbolRefAttr>("kernelModule")
- .getRootReference()
- .str();
+ gpuLaunchOp.getKernelModule()->getRootReference().str();
} else {
kernelModuleName = kernelFunc.getName();
}
>From d31b13935d3512449ad906e2110ac208a0fa1a3e Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 14:46:25 -0800
Subject: [PATCH 5/7] address review feedbacks
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 71d14f5f7774b9..e700e478f2c089 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -842,7 +842,6 @@ def GPU_LaunchOp : GPU_Op<"launch", [
The `kernelFunc` and `kernelModule` attributes are optional and specifies
the kernel name and a module in whichthe kernel should be outlined.
-
Syntax:
```
@@ -1036,16 +1035,6 @@ def GPU_LaunchOp : GPU_Op<"launch", [
static StringRef getNumWorkgroupAttributionsAttrName() {
return "workgroup_attributions";
}
-
- /// Checks if the kernel func name attribute is present.
- bool hasKernelFuncName() {
- return getKernelFunc().has_value();
- }
-
- /// Checks if the kernel module name attribute is present.
- bool hasKernelModuleName() {
- return getKernelModule().has_value();
- }
}];
let hasCanonicalizer = 1;
>From 8b5827f401f0dbde4dd79f3384af01593be59239 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 20:13:32 -0800
Subject: [PATCH 6/7] Move the check of existing kernel module inside the case
where kernel module name is specified.
---
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index da011be1c7eb79..ff8e214e5c10fe 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -411,19 +411,20 @@ class GpuKernelOutliningPass
auto *context = getOperation().getContext();
OpBuilder builder(context);
std::string kernelModuleName;
+ gpu::GPUModuleOp kernelModule = nullptr;
if (gpuLaunchOp.getKernelModule()) {
kernelModuleName =
gpuLaunchOp.getKernelModule()->getRootReference().str();
+ if (auto existingModule =
+ parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
+ kernelModule = existingModule;
+ }
} else {
kernelModuleName = kernelFunc.getName();
}
- gpu::GPUModuleOp kernelModule;
// Check if the module already exists in the symbol table
- if (auto existingModule =
- parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName)) {
- kernelModule = existingModule;
- } else {
+ if (!kernelModule) {
// If not found, create a new GPU module
kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
kernelModuleName);
>From 9110630ab645bea0d7a3d5abbe548ec54188bf94 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Thu, 5 Dec 2024 20:59:10 -0800
Subject: [PATCH 7/7] add test
---
mlir/test/Dialect/GPU/outlining.mlir | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 6e682b26f6c95c..566547123ba698 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -508,3 +508,24 @@ func.func @launch_cluster() {
// CHECK-NEXT: "some_op"(%[[CID]], %[[BID]], %[[BDIM]]) : (index, index, index) -> ()
// CHECK-NEXT: = memref.load %[[KERNEL_ARG1]][%[[TID]]] : memref<?xf32, 1>
+// -----
+// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch
+// CHECK-LABEL: func.func @testKernelAttributes()
+// CHECK: gpu.launch_func @test_module::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
+// CHECK: gpu.module @test_module
+// CHECK: gpu.func @test_kernel_func()
+func.func @testKernelAttributes() {
+ %gDimX = arith.constant 8 : index
+ %gDimY = arith.constant 12 : index
+ %gDimZ = arith.constant 16 : index
+ %bDimX = arith.constant 32 : index
+ %bDimY = arith.constant 16 : index
+ %bDimZ = arith.constant 8 : index
+
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ gpu.terminator
+ } {kernelModule = @test_module, kernelFunc = @test_kernel_func}
+ return
+}
More information about the Mlir-commits
mailing list