[Mlir-commits] [mlir] [mlir][gpu] Update attribute definitions in `gpu::LaunchOp` (PR #152106)
Longsheng Mou
llvmlistbot at llvm.org
Thu Aug 7 08:09:41 PDT 2025
https://github.com/CoTinker updated https://github.com/llvm/llvm-project/pull/152106
>From 500996f641f661f69f23df746350d741fa6ad6f2 Mon Sep 17 00:00:00 2001
From: Longsheng Mou <longshengmou at gmail.com>
Date: Tue, 5 Aug 2025 17:10:27 +0800
Subject: [PATCH 1/2] [mlir][gpu] Update attribute definitions in
`gpu::LaunchOp`
This PR makes two updates to `gpu.launch`:
- Change the attribute type for kernel function and module from
`SymbolRefAttr` to `FlatSymbolRefAttr` to avoid nested symbol references.
- Rename variables from camel case (kernelFunc, kernelModule) to lower case
(function, module).
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 6 +++---
.../Dialect/GPU/Transforms/KernelOutlining.cpp | 9 ++++-----
mlir/test/Dialect/GPU/ops.mlir | 12 ++++++++++++
mlir/test/Dialect/GPU/outlining.mlir | 18 +++++++++---------
4 files changed, 28 insertions(+), 17 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 2ed7d3810b918..872ba5a39c736 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -804,8 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ,
Optional<I32>:$dynamicSharedMemorySize,
- OptionalAttr<SymbolRefAttr>:$kernelFunc,
- OptionalAttr<SymbolRefAttr>:$kernelModule)>,
+ OptionalAttr<FlatSymbolRefAttr>:$function,
+ OptionalAttr<FlatSymbolRefAttr>:$module)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -839,7 +839,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
- a variadic number of Workgroup memory attributions.
- a variadic number of Private memory attributions.
- The `kernelFunc` and `kernelModule` attributes are optional and specifies
+ The `function` and `module` attributes are optional and specifies
the kernel name and a module in which the kernel should be outlined.
Syntax:
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 99f5c5b0cf139..d4978ca768747 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -356,8 +356,8 @@ class GpuKernelOutliningPass
auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
SetVector<Value> operands;
std::string kernelFnName;
- if (op.getKernelFunc()) {
- kernelFnName = op.getKernelFunc()->getRootReference().str();
+ if (op.getFunction()) {
+ kernelFnName = op.getFunction()->str();
} else {
kernelFnName =
Twine(op->getParentOfType<SymbolOpInterface>().getName(),
@@ -403,9 +403,8 @@ class GpuKernelOutliningPass
OpBuilder builder(context);
std::string kernelModuleName;
gpu::GPUModuleOp kernelModule;
- if (gpuLaunchOp.getKernelModule()) {
- kernelModuleName =
- gpuLaunchOp.getKernelModule()->getRootReference().str();
+ if (gpuLaunchOp.getModule()) {
+ kernelModuleName = gpuLaunchOp.getModule()->str();
kernelModule =
parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName);
} else {
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index ee1fdfa4d02f0..2bdad6db3815e 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -17,6 +17,18 @@ module attributes {gpu.container_module} {
return
}
+ // CHECK-LABEL:func @launch_with_module_func_attr(%{{.*}}: index)
+ func.func @launch_with_module_func_attr(%sz : index) {
+ // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
+ threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+ // CHECK: gpu.terminator
+ gpu.terminator
+ // CHECK: {function = @test_kernel_func, module = @existing_module}
+ } {function = @test_kernel_func, module = @existing_module}
+ return
+ }
+
// CHECK-LABEL:func @args(%{{.*}}: index, %{{.*}}: index, %{{.*}}: f32, %{{.*}}: memref<?xf32, 1>) {
func.func @args(%blk : index, %thrd : index, %float : f32, %data : memref<?xf32,1>) {
// CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index d48fa054432d1..0c1921fe1b643 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -509,7 +509,7 @@ func.func @launch_cluster() {
// CHECK-NEXT: = memref.load %[[KERNEL_ARG1]][%[[TID]]] : memref<?xf32, 1>
// -----
-// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch
+// This test tests the two optional attributes `module` and `function` for gpu.launch
// CHECK-LABEL: func.func @testKernelAttributes()
// CHECK: gpu.launch_func @test_module::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
// CHECK: gpu.module @test_module
@@ -526,12 +526,12 @@ func.func @testKernelAttributes() {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {kernelModule = @test_module, kernelFunc = @test_kernel_func}
+ } {module = @test_module, function = @test_kernel_func}
return
}
// -----
-// This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch, when kernelModule already exists.
+// This test tests the two optional attributes `module` and `function` for gpu.launch, when kernelModule already exists.
// CHECK-LABEL: gpu.module @existing_module
// CHECK: gpu.func @test_kernel_func()
@@ -559,12 +559,12 @@ func.func @testExistingModule() {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {kernelModule = @existing_module, kernelFunc = @test_kernel_func}
+ } {module = @existing_module, function = @test_kernel_func}
return
}
// -----
-// This test tests the optional attribute kernelModule for gpu.launch.
+// This test tests the optional attribute `module` for gpu.launch.
// CHECK-LABEL: func.func @testKernelModuleOnly()
// CHECK: gpu.launch_func @test_module::@testKernelModuleOnly_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
// CHECK: gpu.module @test_module
@@ -581,12 +581,12 @@ func.func @testKernelModuleOnly() {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {kernelModule = @test_module}
+ } {module = @test_module}
return
}
// -----
-// This test tests the optional attribute kernelFunc for gpu.launch.
+// This test tests the optional attribute `function` for gpu.launch.
// CHECK-LABEL: func.func @testKernelFuncOnly()
// CHECK: gpu.launch_func @test_kernel_func::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
@@ -604,12 +604,12 @@ func.func @testKernelFuncOnly() {
threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {kernelFunc = @test_kernel_func}
+ } {function = @test_kernel_func}
return
}
// -----
-// This test tests gpu.launch when optional attributes kernelModule and kernelFunc are not specified.
+// This test tests gpu.launch when optional attributes `module` and `function` are not specified.
// CHECK-LABEL: func.func @testNoAttributes()
// CHECK: gpu.launch_func @testNoAttributes_kernel::@testNoAttributes_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
>From 99c7bdc7713b7a65d9d9a74e0ad5eaab99303fa2 Mon Sep 17 00:00:00 2001
From: Longsheng Mou <longshengmou at gmail.com>
Date: Thu, 7 Aug 2025 22:51:15 +0800
Subject: [PATCH 2/2] update syntax
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 18 ++++++--
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 51 +++++++++++++++++++++-
mlir/test/Dialect/GPU/ops.mlir | 8 ++--
mlir/test/Dialect/GPU/outlining.mlir | 20 +++++----
4 files changed, 80 insertions(+), 17 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 872ba5a39c736..f946bb731e2ca 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -804,8 +804,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Optional<Index>:$clusterSizeY,
Optional<Index>:$clusterSizeZ,
Optional<I32>:$dynamicSharedMemorySize,
- OptionalAttr<FlatSymbolRefAttr>:$function,
- OptionalAttr<FlatSymbolRefAttr>:$module)>,
+ OptionalAttr<FlatSymbolRefAttr>:$module,
+ OptionalAttr<FlatSymbolRefAttr>:$function)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -850,6 +850,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
`blocks` `(` ssa-id-list `)` `in` ssa-reassignment
`threads` `(` ssa-id-list `)` `in` ssa-reassignment
(dynamic_shared_memory_size ssa-use)?
+ (`module(` symbol-ref-id `)`)?
+ (`function(` symbol-ref-id `)`)?
memory-attribution
region attr-dict?
ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
@@ -907,6 +909,14 @@ def GPU_LaunchOp : GPU_Op<"launch", [
// sizes are immediately usable inside body region.
"some_op"(%cx, %bx, %tx) : (index, index, index) -> ()
}
+
+ // Launch with module and function attributes.
+ gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
+ threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
+ module(@kernel_module) function(@kernel_func) {
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ %42 = load %val1[%bx] : memref<?xf32, 1>
+ }
```
Rationale: using operation/block arguments gives analyses a clear way of
@@ -931,7 +941,9 @@ def GPU_LaunchOp : GPU_Op<"launch", [
CArg<"TypeRange", "{}">:$privateAttributions,
CArg<"Value", "nullptr">:$clusterSizeX,
CArg<"Value", "nullptr">:$clusterSizeY,
- CArg<"Value", "nullptr">:$clusterSizeZ)>
+ CArg<"Value", "nullptr">:$clusterSizeZ,
+ CArg<"FlatSymbolRefAttr", "nullptr">:$module,
+ CArg<"FlatSymbolRefAttr", "nullptr">:$function)>,
];
let extraClassDeclaration = [{
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 5a72ef17db7f0..9be4b3649d274 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -756,7 +756,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
Type asyncTokenType, ValueRange asyncDependencies,
TypeRange workgroupAttributions,
TypeRange privateAttributions, Value clusterSizeX,
- Value clusterSizeY, Value clusterSizeZ) {
+ Value clusterSizeY, Value clusterSizeZ,
+ FlatSymbolRefAttr module, FlatSymbolRefAttr function) {
OpBuilder::InsertionGuard g(builder);
// Add a WorkGroup attribution attribute. This attribute is required to
@@ -781,6 +782,12 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
+ // Add optional module and function attributes.
+ if (module)
+ result.addAttribute(getModuleAttrName(result.name), module);
+ if (function)
+ result.addAttribute(getFunctionAttrName(result.name), function);
+
// Create a kernel body region with kNumConfigRegionAttributes + N memory
// attributions, where the first kNumConfigRegionAttributes arguments have
// `index` type and the rest have the same types as the data operands.
@@ -944,6 +951,21 @@ void LaunchOp::print(OpAsmPrinter &p) {
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
<< getDynamicSharedMemorySize();
+ // Print optional module attribute.
+ StringRef moduleAttrName = getModuleAttrName();
+ if (auto module = getModule()) {
+ p << ' ' << moduleAttrName << '(';
+ p.printSymbolName(*module);
+ p << ')';
+ }
+ // Print optional function attribute.
+ StringRef functionAttrName = getFunctionAttrName();
+ if (auto function = getFunction()) {
+ p << ' ' << functionAttrName << '(';
+ p.printSymbolName(*function);
+ p << ')';
+ }
+
printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
@@ -952,7 +974,8 @@ void LaunchOp::print(OpAsmPrinter &p) {
p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
LaunchOp::getOperandSegmentSizeAttr(),
- getNumWorkgroupAttributionsAttrName()});
+ getNumWorkgroupAttributionsAttrName(),
+ moduleAttrName, functionAttrName});
}
// Parse the size assignment blocks for blocks and threads. These have the form
@@ -990,6 +1013,9 @@ parseSizeAssignment(OpAsmParser &parser,
/// `clusters` `(` ssa-id-list `)` `in` ssa-reassignment (Optional)
/// `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
/// `threads` `(` ssa-id-list `)` `in` ssa-reassignment
+/// (`dynamic_shared_memory_size` ssa-id)?
+/// (`module(` symbol-ref-id `)`)?
+/// (`function(` symbol-ref-id `)`)?
/// memory-attribution
/// region attr-dict?
/// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
@@ -1060,6 +1086,27 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
return failure();
}
+ // Parse optional module attribute.
+ StringRef moduleAttrName = getModuleAttrName(result.name);
+ if (succeeded(parser.parseOptionalKeyword(moduleAttrName))) {
+ FlatSymbolRefAttr moduleSymbol;
+ if (parser.parseLParen() ||
+ parser.parseAttribute(moduleSymbol, Type(), moduleAttrName,
+ result.attributes) ||
+ parser.parseRParen())
+ return failure();
+ }
+ // Parse optional function attribute.
+ StringRef functionAttrName = getFunctionAttrName(result.name);
+ if (succeeded(parser.parseOptionalKeyword(functionAttrName))) {
+ FlatSymbolRefAttr funcSymbol;
+ if (parser.parseLParen() ||
+ parser.parseAttribute(funcSymbol, Type(), functionAttrName,
+ result.attributes) ||
+ parser.parseRParen())
+ return failure();
+ }
+
// Create the region arguments, it has kNumConfigRegionAttributes arguments
// that correspond to block/thread identifiers and grid/block sizes, all
// having `index` type, a variadic number of WorkGroup Attributions and
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 2bdad6db3815e..9cc0bf8f41d5a 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -19,13 +19,13 @@ module attributes {gpu.container_module} {
// CHECK-LABEL:func @launch_with_module_func_attr(%{{.*}}: index)
func.func @launch_with_module_func_attr(%sz : index) {
- // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
+ // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) module(@test_module) function(@test_kernel_func)
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
- threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+ threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz)
+ module(@test_module) function(@test_kernel_func) {
// CHECK: gpu.terminator
gpu.terminator
- // CHECK: {function = @test_kernel_func, module = @existing_module}
- } {function = @test_kernel_func, module = @existing_module}
+ }
return
}
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 0c1921fe1b643..04901182a80f5 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -523,10 +523,11 @@ func.func @testKernelAttributes() {
%bDimZ = arith.constant 8 : index
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
- threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
+ module(@test_module) function(@test_kernel_func) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {module = @test_module, function = @test_kernel_func}
+ }
return
}
@@ -556,10 +557,11 @@ func.func @testExistingModule() {
%bDimZ = arith.constant 8 : index
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
- threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
+ module(@existing_module) function(@test_kernel_func) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {module = @existing_module, function = @test_kernel_func}
+ }
return
}
@@ -578,10 +580,11 @@ func.func @testKernelModuleOnly() {
%bDimZ = arith.constant 8 : index
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
- threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
+ module(@test_module) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {module = @test_module}
+ }
return
}
@@ -601,10 +604,11 @@ func.func @testKernelFuncOnly() {
%bDimZ = arith.constant 8 : index
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
- threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ) {
+ threads(%tx, %ty, %tz) in (%block_x = %bDimX, %block_y = %bDimY, %block_z = %bDimZ)
+ function(@test_kernel_func) {
"some_op"(%bx, %tx) : (index, index) -> ()
gpu.terminator
- } {function = @test_kernel_func}
+ }
return
}
More information about the Mlir-commits
mailing list