[Mlir-commits] [mlir] [mlir][gpu] Allow integer attribute as `dynamic_shared_memory_size` (PR #71509)
Guray Ozen
llvmlistbot at llvm.org
Wed Nov 15 07:30:09 PST 2023
https://github.com/grypp updated https://github.com/llvm/llvm-project/pull/71509
>From 275969718d63de31e403ee29714971c6ea671357 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 11:32:08 +0100
Subject: [PATCH 1/7] [mlir][gpu] Allow integer attribute as
`dynamic_shared_memory_size` parameter of `gpu.launch`
This PR allows integer attributes as `dynamic_shared_memory_size` parameter of `gpu.launch`. See the example IR below, `200` doesn't have to be SSA value anymore.
```
gpu.launch blocks(..) threads(...)
dynamic_shared_memory_size 200
```
---
mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h | 1 +
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 24 +++++++++++--
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 34 ++++++++++++++-----
.../GPU/Transforms/KernelOutlining.cpp | 2 +-
mlir/test/Dialect/GPU/outlining.mlir | 33 ++++++++++++++++++
5 files changed, 82 insertions(+), 12 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 14a1fac5fd255f3..06b1ea95d20339d 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -17,6 +17,7 @@
#include "mlir/Bytecode/BytecodeOpInterface.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6375d35f4311295..5bf5cbc5efe628f 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -587,7 +587,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
- Optional<I32>:$dynamicSharedMemorySize)>,
+ Optional<I32>:$dynamicSharedMemorySize,
+ OptionalAttr<SI32Attr>:$dynamicSharedMemorySizeConstant)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -693,7 +694,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
CArg<"Type", "nullptr">:$asyncTokenType,
CArg<"ValueRange", "{}">:$asyncDependencies,
CArg<"TypeRange", "{}">:$workgroupAttributions,
- CArg<"TypeRange", "{}">:$privateAttributions)>
+ CArg<"TypeRange", "{}">:$privateAttributions,
+ CArg<"IntegerAttr", "IntegerAttr()">:$dynamicSharedMemorySizeConstant)>
];
let extraClassDeclaration = [{
@@ -728,6 +730,24 @@ def GPU_LaunchOp : GPU_Op<"launch", [
/// Returns the keywords used in the custom syntax for this Op.
static StringRef getWorkgroupKeyword() { return "workgroup"; }
static StringRef getPrivateKeyword() { return "private"; }
+ static StringRef getDynamicSharedMemorySizeConstantKeyword() {
+ return "dynamicSharedMemorySizeConstant";
+ }
+
+ static int getDynamicSharedMemorySizeDynamicValue() {
+ return std::numeric_limits<int32_t>::min();
+ }
+ /// Returns a value of the dynamic shared memory size.
+ /// If it is a constant, it builds one
+ mlir::Value getDynamicSharedMemorySizeValue(OpBuilder &b) {
+ int32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
+ if (getDynamicSharedMemorySizeConstant().value_or(kDynamic) == kDynamic)
+ return getDynamicSharedMemorySize();
+ return b.create<mlir::arith::ConstantOp>(
+ getLoc(), b.getIntegerType(32),
+ b.getI32IntegerAttr(
+ getDynamicSharedMemorySizeConstant().value()));
+ }
/// Returns the number of buffers located in the workgroup memory.
unsigned getNumWorkgroupAttributions() {
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 5eb2cadc884e151..269ee7dcaec0e71 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -618,7 +618,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
Value getBlockSizeZ, Value dynamicSharedMemorySize,
Type asyncTokenType, ValueRange asyncDependencies,
TypeRange workgroupAttributions,
- TypeRange privateAttributions) {
+ TypeRange privateAttributions,
+ IntegerAttr dynamicSharedMemorySizeAttr) {
// Add a WorkGroup attribution attribute. This attribute is required to
// identify private attributions in the list of block argguments.
result.addAttribute(getNumWorkgroupAttributionsAttrName(),
@@ -634,7 +635,9 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
getBlockSizeY, getBlockSizeZ});
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
-
+ if (dynamicSharedMemorySizeAttr)
+ result.addAttribute(getDynamicSharedMemorySizeConstantKeyword(),
+ dynamicSharedMemorySizeAttr);
// Create a kernel body region with kNumConfigRegionAttributes + N memory
// attributions, where the first kNumConfigRegionAttributes arguments have
// `index` type and the rest have the same types as the data operands.
@@ -759,6 +762,10 @@ void LaunchOp::print(OpAsmPrinter &p) {
if (getDynamicSharedMemorySize())
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
<< getDynamicSharedMemorySize();
+ else if (getDynamicSharedMemorySizeConstantAttr()) {
+ p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
+ << getDynamicSharedMemorySizeConstantAttr().getSInt();
+ }
printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
@@ -768,7 +775,8 @@ void LaunchOp::print(OpAsmPrinter &p) {
p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
LaunchOp::getOperandSegmentSizeAttr(),
- getNumWorkgroupAttributionsAttrName()});
+ getNumWorkgroupAttributionsAttrName(),
+ getDynamicSharedMemorySizeConstantKeyword()});
}
// Parse the size assignment blocks for blocks and threads. These have the form
@@ -854,12 +862,20 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
bool hasDynamicSharedMemorySize = false;
if (!parser.parseOptionalKeyword(
LaunchOp::getDynamicSharedMemorySizeKeyword())) {
- hasDynamicSharedMemorySize = true;
- if (parser.parseOperand(dynamicSharedMemorySize) ||
- parser.resolveOperand(dynamicSharedMemorySize,
- parser.getBuilder().getI32Type(),
- result.operands))
- return failure();
+ IntegerAttr shmemAttr;
+ OptionalParseResult shmemAttrResult = parser.parseOptionalAttribute(
+ shmemAttr, parser.getBuilder().getIntegerType(32, true));
+ if (!shmemAttrResult.has_value()) {
+ hasDynamicSharedMemorySize = true;
+ shmemAttr = parser.getBuilder().getSI32IntegerAttr(
+ getDynamicSharedMemorySizeDynamicValue());
+ if (parser.parseOperand(dynamicSharedMemorySize) ||
+ parser.resolveOperand(dynamicSharedMemorySize,
+ parser.getBuilder().getI32Type(),
+ result.operands))
+ return failure();
+ }
+ result.addAttribute(getDynamicSharedMemorySizeConstantKeyword(), shmemAttr);
}
// Create the region arguments, it has kNumConfigRegionAttributes arguments
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index b1e2f914db4cb9b..3e29fbe8cdfbbc3 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -281,7 +281,7 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
launchOp.getBlockSizeOperandValues(),
- launchOp.getDynamicSharedMemorySize(), operands,
+ launchOp.getDynamicSharedMemorySizeValue(builder), operands,
asyncToken ? asyncToken.getType() : nullptr,
launchOp.getAsyncDependencies());
launchOp.replaceAllUsesWith(launchFunc);
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 28c121a550100c2..b032a4035230990 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -372,3 +372,36 @@ func.func @launch_memory_attributions_1(%arg0 : memref<*xf32>) {
}
// CHECK-DL-LABEL: gpu.module @launch_memory_attributions_1_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
+
+
+// -----
+
+// CHECK-LABEL: func.func @dynamic_shared_memory(
+// CHECK-SAME: %[[arg0:.+]]: i32
+func.func @dynamic_shared_memory(%shmemSize : i32) {
+ %c1 = arith.constant 1 : index
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ dynamic_shared_memory_size %shmemSize
+ {
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ dynamic_shared_memory_size 200
+ {
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ {
+ gpu.terminator
+ }
+
+
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[arg0]]
+// CHECK: %[[c200:.+]] = arith.constant 200 : i32
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel_0::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[c200]]
+ return
+}
+
>From abe8adc1c1139985abc1b75cb9adbb0697bcc014 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 16:59:26 +0100
Subject: [PATCH 2/7] address @nicolasvasilache comments
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 13 +++++--------
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 6 +++---
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 11 ++++++++++-
mlir/test/Dialect/GPU/outlining.mlir | 8 ++++----
4 files changed, 22 insertions(+), 16 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 5bf5cbc5efe628f..17f376cccc640b5 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -588,7 +588,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Optional<I32>:$dynamicSharedMemorySize,
- OptionalAttr<SI32Attr>:$dynamicSharedMemorySizeConstant)>,
+ OptionalAttr<I32Attr>:$dynamicSharedMemorySizeConstant)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -735,18 +735,15 @@ def GPU_LaunchOp : GPU_Op<"launch", [
}
static int getDynamicSharedMemorySizeDynamicValue() {
- return std::numeric_limits<int32_t>::min();
+ return std::numeric_limits<uint32_t>::max();
}
/// Returns a value of the dynamic shared memory size.
/// If it is a constant, it builds one
- mlir::Value getDynamicSharedMemorySizeValue(OpBuilder &b) {
- int32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
+ mlir::OpFoldResult getDynamicSharedMemorySizeValue(OpBuilder &b) {
+ uint32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
if (getDynamicSharedMemorySizeConstant().value_or(kDynamic) == kDynamic)
return getDynamicSharedMemorySize();
- return b.create<mlir::arith::ConstantOp>(
- getLoc(), b.getIntegerType(32),
- b.getI32IntegerAttr(
- getDynamicSharedMemorySizeConstant().value()));
+ return b.getI32IntegerAttr(getDynamicSharedMemorySizeConstant().value());
}
/// Returns the number of buffers located in the workgroup memory.
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 269ee7dcaec0e71..4cfa64dc6a636f3 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -764,7 +764,7 @@ void LaunchOp::print(OpAsmPrinter &p) {
<< getDynamicSharedMemorySize();
else if (getDynamicSharedMemorySizeConstantAttr()) {
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
- << getDynamicSharedMemorySizeConstantAttr().getSInt();
+ << getDynamicSharedMemorySizeConstantAttr().getInt();
}
printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
@@ -864,10 +864,10 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
LaunchOp::getDynamicSharedMemorySizeKeyword())) {
IntegerAttr shmemAttr;
OptionalParseResult shmemAttrResult = parser.parseOptionalAttribute(
- shmemAttr, parser.getBuilder().getIntegerType(32, true));
+ shmemAttr, parser.getBuilder().getIntegerType(32));
if (!shmemAttrResult.has_value()) {
hasDynamicSharedMemorySize = true;
- shmemAttr = parser.getBuilder().getSI32IntegerAttr(
+ shmemAttr = parser.getBuilder().getI32IntegerAttr(
getDynamicSharedMemorySizeDynamicValue());
if (parser.parseOperand(dynamicSharedMemorySize) ||
parser.resolveOperand(dynamicSharedMemorySize,
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 3e29fbe8cdfbbc3..9f1fcf4a14e8154 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -278,10 +278,19 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
// The launch op has an optional dynamic shared memory size. If it doesn't
// exist, we use zero.
Value asyncToken = launchOp.getAsyncToken();
+ Value dynamicSharedSize;
+ OpFoldResult shmem = launchOp.getDynamicSharedMemorySizeValue(builder);
+ if (auto shmemValue = llvm::dyn_cast<Value>(shmem)) {
+ dynamicSharedSize = shmemValue;
+ } else if (auto shmemConst = getConstantIntValue(shmem)) {
+ dynamicSharedSize = builder.create<mlir::arith::ConstantOp>(
+ launchOp->getLoc(), builder.getIntegerType(32),
+ builder.getI32IntegerAttr(shmemConst.value()));
+ }
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
launchOp.getBlockSizeOperandValues(),
- launchOp.getDynamicSharedMemorySizeValue(builder), operands,
+ dynamicSharedSize, operands,
asyncToken ? asyncToken.getType() : nullptr,
launchOp.getAsyncDependencies());
launchOp.replaceAllUsesWith(launchFunc);
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index b032a4035230990..68aabd142ee34a6 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -388,7 +388,7 @@ func.func @dynamic_shared_memory(%shmemSize : i32) {
}
gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
- dynamic_shared_memory_size 200
+ dynamic_shared_memory_size 8192
{
gpu.terminator
}
@@ -398,10 +398,10 @@ func.func @dynamic_shared_memory(%shmemSize : i32) {
gpu.terminator
}
-
// CHECK: gpu.launch_func @dynamic_shared_memory_kernel::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[arg0]]
-// CHECK: %[[c200:.+]] = arith.constant 200 : i32
-// CHECK: gpu.launch_func @dynamic_shared_memory_kernel_0::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[c200]]
+// CHECK: %[[c8192:.+]] = arith.constant 8192 : i32
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel_0::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[c8192]]
+// CHECK: return
return
}
>From 9559fd82f25609681a137591de9ca8bd8597255d Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 17:02:27 +0100
Subject: [PATCH 3/7] format fix
---
mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 06b1ea95d20339d..1364bc8d93208ec 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -15,9 +15,9 @@
#define MLIR_DIALECT_GPU_IR_GPUDIALECT_H
#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
-#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
>From 8f806d85e1bfa6f5a96b896b4c1170339dbd6e74 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 17:19:16 +0100
Subject: [PATCH 4/7] add comment
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 17f376cccc640b5..79447c9530922fb 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -733,10 +733,13 @@ def GPU_LaunchOp : GPU_Op<"launch", [
static StringRef getDynamicSharedMemorySizeConstantKeyword() {
return "dynamicSharedMemorySizeConstant";
}
-
- static int getDynamicSharedMemorySizeDynamicValue() {
+
+ /// Returns dynamic value of the dynamic shared memory size. This is used
+ /// if dynamic_shared_memory_size is SSA value
+ static uint32_t getDynamicSharedMemorySizeDynamicValue() {
return std::numeric_limits<uint32_t>::max();
}
+
/// Returns a value of the dynamic shared memory size.
/// If it is a constant, it builds one
mlir::OpFoldResult getDynamicSharedMemorySizeValue(OpBuilder &b) {
>From 3197598ba5b321bf94cd77a33ca8aa86508a9904 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 17:19:45 +0100
Subject: [PATCH 5/7] fix format
---
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 9f1fcf4a14e8154..374c1a45a403234 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -289,8 +289,7 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
}
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
- launchOp.getBlockSizeOperandValues(),
- dynamicSharedSize, operands,
+ launchOp.getBlockSizeOperandValues(), dynamicSharedSize, operands,
asyncToken ? asyncToken.getType() : nullptr,
launchOp.getAsyncDependencies());
launchOp.replaceAllUsesWith(launchFunc);
>From dc3c90384ec9adf8c5df24442df0262495569a9d Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Wed, 8 Nov 2023 09:29:12 +0100
Subject: [PATCH 6/7] add verifier, make kDynamic constexpr
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 12 +++++-------
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 12 ++++++++++--
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 79447c9530922fb..f1a475970bdcaa8 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -727,6 +727,10 @@ def GPU_LaunchOp : GPU_Op<"launch", [
/// placed in the leading positions of the argument list.
static constexpr unsigned kNumConfigRegionAttributes = 12;
+ /// Dynamic value of the dynamic shared memory size. This is used
+ /// when dynamic_shared_memory_size is SSA value
+ static constexpr uint32_t kDynamic = std::numeric_limits<uint32_t>::max();
+
/// Returns the keywords used in the custom syntax for this Op.
static StringRef getWorkgroupKeyword() { return "workgroup"; }
static StringRef getPrivateKeyword() { return "private"; }
@@ -734,16 +738,9 @@ def GPU_LaunchOp : GPU_Op<"launch", [
return "dynamicSharedMemorySizeConstant";
}
- /// Returns dynamic value of the dynamic shared memory size. This is used
- /// if dynamic_shared_memory_size is SSA value
- static uint32_t getDynamicSharedMemorySizeDynamicValue() {
- return std::numeric_limits<uint32_t>::max();
- }
-
/// Returns a value of the dynamic shared memory size.
/// If it is a constant, it builds one
mlir::OpFoldResult getDynamicSharedMemorySizeValue(OpBuilder &b) {
- uint32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
if (getDynamicSharedMemorySizeConstant().value_or(kDynamic) == kDynamic)
return getDynamicSharedMemorySize();
return b.getI32IntegerAttr(getDynamicSharedMemorySizeConstant().value());
@@ -800,6 +797,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
let hasCanonicalizer = 1;
let hasCustomAssemblyFormat = 1;
let hasRegionVerifier = 1;
+ let hasVerifier = 1;
}
def GPU_PrintfOp : GPU_Op<"printf", [MemoryEffects<[MemWrite]>]>,
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 4cfa64dc6a636f3..55e327cf5a20747 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -694,6 +694,14 @@ KernelDim3 LaunchOp::getBlockSizeOperandValues() {
return KernelDim3{operands[3], operands[4], operands[5]};
}
+LogicalResult LaunchOp::verify() {
+ if (getDynamicSharedMemorySize() &&
+ getDynamicSharedMemorySizeConstant().value_or(kDynamic) != kDynamic)
+ return emitOpError() << getDynamicSharedMemorySizeKeyword()
+ << " operand cannot be both SSA value and constant";
+ return success();
+}
+
LogicalResult LaunchOp::verifyRegions() {
// Kernel launch takes kNumConfigOperands leading operands for grid/block
// sizes and transforms them into kNumConfigRegionAttributes region arguments
@@ -867,8 +875,8 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
shmemAttr, parser.getBuilder().getIntegerType(32));
if (!shmemAttrResult.has_value()) {
hasDynamicSharedMemorySize = true;
- shmemAttr = parser.getBuilder().getI32IntegerAttr(
- getDynamicSharedMemorySizeDynamicValue());
+ shmemAttr =
+ parser.getBuilder().getI32IntegerAttr(gpu::LaunchOp::kDynamic);
if (parser.parseOperand(dynamicSharedMemorySize) ||
parser.resolveOperand(dynamicSharedMemorySize,
parser.getBuilder().getI32Type(),
>From d79177270e591ac245dac26d3d073bddb7dc658b Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Wed, 15 Nov 2023 16:29:57 +0100
Subject: [PATCH 7/7] Update GPUDialect.h
---
mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 1364bc8d93208ec..14a1fac5fd255f3 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -15,7 +15,6 @@
#define MLIR_DIALECT_GPU_IR_GPUDIALECT_H
#include "mlir/Bytecode/BytecodeOpInterface.h"
-#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
#include "mlir/IR/Builders.h"
More information about the Mlir-commits
mailing list