[Mlir-commits] [mlir] 516d6ed - [mlir][gpu] Add optional attributes of kernelModule and kernelFunc for outlining kernels. (#118861)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Dec 6 12:33:37 PST 2024
Author: Zhen Wang
Date: 2024-12-06T12:33:34-08:00
New Revision: 516d6ede122086027baa2288623605a423375e87
URL: https://github.com/llvm/llvm-project/commit/516d6ede122086027baa2288623605a423375e87
DIFF: https://github.com/llvm/llvm-project/commit/516d6ede122086027baa2288623605a423375e87.diff
LOG: [mlir][gpu] Add optional attributes of kernelModule and kernelFunc for outlining kernels. (#118861)
Adding optional attributes so we can specify the kernel function names
and the kernel module names generated.
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
mlir/test/Dialect/GPU/outlining.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index d08e7ceb9e6c69..42a017db300af6 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -803,7 +803,9 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Optional<Index>:$clusterSizeX,
Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ,
- Optional<I32>:$dynamicSharedMemorySize)>,
+ Optional<I32>:$dynamicSharedMemorySize,
+ OptionalAttr<SymbolRefAttr>:$kernelFunc,
+ OptionalAttr<SymbolRefAttr>:$kernelModule)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -837,6 +839,9 @@ def GPU_LaunchOp : GPU_Op<"launch", [
- a variadic number of Workgroup memory attributions.
- a variadic number of Private memory attributions.
+ The `kernelFunc` and `kernelModule` attributes are optional and specifies
+ the kernel name and a module in which the kernel should be outlined.
+
Syntax:
```
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 5f6556d915f41c..ba0c80c50211e3 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -364,9 +364,15 @@ class GpuKernelOutliningPass
Block::iterator insertPt(func->getNextNode());
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
SetVector<Value> operands;
- std::string kernelFnName =
- Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
- .str();
+ std::string kernelFnName;
+ if (op.getKernelFunc()) {
+ kernelFnName = op.getKernelFunc()->getRootReference().str();
+ } else {
+ kernelFnName =
+ Twine(op->getParentOfType<SymbolOpInterface>().getName(),
+ "_kernel")
+ .str();
+ }
gpu::GPUFuncOp outlinedFunc =
outlineKernelFuncImpl(op, kernelFnName, operands);
@@ -374,7 +380,7 @@ class GpuKernelOutliningPass
// Create nested module and insert outlinedFunc. The module will
// originally get the same name as the function, but may be renamed on
// insertion into the parent module.
- auto kernelModule = createKernelModule(outlinedFunc, symbolTable);
+ auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
symbolTable.insert(kernelModule, insertPt);
// Potentially changes signature, pulling in constants.
@@ -395,7 +401,8 @@ class GpuKernelOutliningPass
private:
/// Returns a gpu.module containing kernelFunc and all callees (recursive).
- gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
+ gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,
+ gpu::GPUFuncOp kernelFunc,
const SymbolTable &parentSymbolTable) {
// TODO: This code cannot use an OpBuilder because it must be inserted into
// a SymbolTable by the caller. SymbolTable needs to be refactored to
@@ -403,8 +410,23 @@ class GpuKernelOutliningPass
// and then this needs to use the OpBuilder.
auto *context = getOperation().getContext();
OpBuilder builder(context);
- auto kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
- kernelFunc.getName());
+ std::string kernelModuleName;
+ gpu::GPUModuleOp kernelModule;
+ if (gpuLaunchOp.getKernelModule()) {
+ kernelModuleName =
+ gpuLaunchOp.getKernelModule()->getRootReference().str();
+ kernelModule =
+ parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName);
+ } else {
+ kernelModuleName = kernelFunc.getName();
+ }
+
+ // Check if the module already exists in the symbol table
+ if (!kernelModule) {
+ // If not found, create a new GPU module
+ kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
+ kernelModuleName);
+ }
// If a valid data layout spec was provided, attach it to the kernel module.
// Otherwise, the default data layout will be used.
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 6e682b26f6c95c..d48fa054432d1a 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -508,3 +508,125 @@ func.func @launch_cluster() {
// CHECK-NEXT: "some_op"(%[[CID]], %[[BID]], %[[BDIM]]) : (index, index, index) -> ()
// CHECK-NEXT: = memref.load %[[KERNEL_ARG1]][%[[TID]]] : memref<?xf32, 1>
+// -----
+// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch
+// CHECK-LABEL: func.func @testKernelAttributes()
+// CHECK: gpu.launch_func @test_module::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
+// CHECK: gpu.module @test_module
+// CHECK: gpu.func @test_kernel_func()
+func.func @testKernelAttributes() {
+ %gDimX = arith.constant 8 : index
+ %gDimY = arith.constant 12 : index
+ %gDimZ = arith.constant 16 : index
+ %bDimX = arith.constant 32 : index
+ %bDimY = arith.constant 16 : index
+ %bDimZ = arith.constant 8 : index
+
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ gpu.terminator
+ } {kernelModule = @test_module, kernelFunc = @test_kernel_func}
+ return
+}
+
+// -----
+// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch, when kernelModule already exists.
+
+// CHECK-LABEL: gpu.module @existing_module
+// CHECK: gpu.func @test_kernel_func()
+// CHECK: gpu.func @test_kernel_func_0()
+// CHECK-NOT: gpu.module @testExistingModule_kernel
+// CHECK-NOT: gpu.func @testExistingModule_kernel()
+// CHECK: func.func @testExistingModule()
+// CHECK: gpu.launch_func @existing_module::@test_kernel_func_0 blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
+
+gpu.module @existing_module {
+ gpu.func @test_kernel_func() {
+ gpu.return
+ }
+}
+
+func.func @testExistingModule() {
+ %gDimX = arith.constant 8 : index
+ %gDimY = arith.constant 12 : index
+ %gDimZ = arith.constant 16 : index
+ %bDimX = arith.constant 32 : index
+ %bDimY = arith.constant 16 : index
+ %bDimZ = arith.constant 8 : index
+
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ gpu.terminator
+ } {kernelModule = @existing_module, kernelFunc = @test_kernel_func}
+ return
+}
+
+// -----
+// This test tests the optional attribute kernelModule for gpu.launch.
+// CHECK-LABEL: func.func @testKernelModuleOnly()
+// CHECK: gpu.launch_func @test_module::@testKernelModuleOnly_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
+// CHECK: gpu.module @test_module
+// CHECK: gpu.func @testKernelModuleOnly_kernel()
+func.func @testKernelModuleOnly() {
+ %gDimX = arith.constant 8 : index
+ %gDimY = arith.constant 12 : index
+ %gDimZ = arith.constant 16 : index
+ %bDimX = arith.constant 32 : index
+ %bDimY = arith.constant 16 : index
+ %bDimZ = arith.constant 8 : index
+
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ gpu.terminator
+ } {kernelModule = @test_module}
+ return
+}
+
+// -----
+// This test tests the optional attribute kernelFunc for gpu.launch.
+// CHECK-LABEL: func.func @testKernelFuncOnly()
+// CHECK: gpu.launch_func @test_kernel_func::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
+
+// CHECK: gpu.module @test_kernel_func
+// CHECK: gpu.func @test_kernel_func()
+func.func @testKernelFuncOnly() {
+ %gDimX = arith.constant 8 : index
+ %gDimY = arith.constant 12 : index
+ %gDimZ = arith.constant 16 : index
+ %bDimX = arith.constant 32 : index
+ %bDimY = arith.constant 16 : index
+ %bDimZ = arith.constant 8 : index
+
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ gpu.terminator
+ } {kernelFunc = @test_kernel_func}
+ return
+}
+
+// -----
+// This test tests gpu.launch when optional attributes kernelModule and kernelFunc are not specified.
+// CHECK-LABEL: func.func @testNoAttributes()
+// CHECK: gpu.launch_func @testNoAttributes_kernel::@testNoAttributes_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
+
+// CHECK: gpu.module @testNoAttributes_kernel
+// CHECK: gpu.func @testNoAttributes_kernel()
+func.func @testNoAttributes() {
+ %gDimX = arith.constant 8 : index
+ %gDimY = arith.constant 12 : index
+ %gDimZ = arith.constant 16 : index
+ %bDimX = arith.constant 32 : index
+ %bDimY = arith.constant 16 : index
+ %bDimZ = arith.constant 8 : index
+
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ gpu.terminator
+ }
+ return
+}
More information about the Mlir-commits
mailing list