[Mlir-commits] [mlir] [mlir][gpu] Allow integer attribute as `dynamic_shared_memory_size` (PR #71509)
Guray Ozen
llvmlistbot at llvm.org
Tue Nov 7 08:02:41 PST 2023
https://github.com/grypp updated https://github.com/llvm/llvm-project/pull/71509
>From 275969718d63de31e403ee29714971c6ea671357 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 11:32:08 +0100
Subject: [PATCH 1/3] [mlir][gpu] Allow integer attribute as
`dynamic_shared_memory_size` parameter of `gpu.launch`
This PR allows integer attributes as `dynamic_shared_memory_size` parameter of `gpu.launch`. See the example IR below, `200` doesn't have to be SSA value anymore.
```
gpu.launch blocks(..) threads(...)
dynamic_shared_memory_size 200
```
---
mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h | 1 +
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 24 +++++++++++--
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 34 ++++++++++++++-----
.../GPU/Transforms/KernelOutlining.cpp | 2 +-
mlir/test/Dialect/GPU/outlining.mlir | 33 ++++++++++++++++++
5 files changed, 82 insertions(+), 12 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 14a1fac5fd255f3..06b1ea95d20339d 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -17,6 +17,7 @@
#include "mlir/Bytecode/BytecodeOpInterface.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6375d35f4311295..5bf5cbc5efe628f 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -587,7 +587,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
- Optional<I32>:$dynamicSharedMemorySize)>,
+ Optional<I32>:$dynamicSharedMemorySize,
+ OptionalAttr<SI32Attr>:$dynamicSharedMemorySizeConstant)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -693,7 +694,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
CArg<"Type", "nullptr">:$asyncTokenType,
CArg<"ValueRange", "{}">:$asyncDependencies,
CArg<"TypeRange", "{}">:$workgroupAttributions,
- CArg<"TypeRange", "{}">:$privateAttributions)>
+ CArg<"TypeRange", "{}">:$privateAttributions,
+ CArg<"IntegerAttr", "IntegerAttr()">:$dynamicSharedMemorySizeConstant)>
];
let extraClassDeclaration = [{
@@ -728,6 +730,24 @@ def GPU_LaunchOp : GPU_Op<"launch", [
/// Returns the keywords used in the custom syntax for this Op.
static StringRef getWorkgroupKeyword() { return "workgroup"; }
static StringRef getPrivateKeyword() { return "private"; }
+ static StringRef getDynamicSharedMemorySizeConstantKeyword() {
+ return "dynamicSharedMemorySizeConstant";
+ }
+
+ static int getDynamicSharedMemorySizeDynamicValue() {
+ return std::numeric_limits<int32_t>::min();
+ }
+ /// Returns a value of the dynamic shared memory size.
+ /// If it is a constant, it builds one
+ mlir::Value getDynamicSharedMemorySizeValue(OpBuilder &b) {
+ int32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
+ if (getDynamicSharedMemorySizeConstant().value_or(kDynamic) == kDynamic)
+ return getDynamicSharedMemorySize();
+ return b.create<mlir::arith::ConstantOp>(
+ getLoc(), b.getIntegerType(32),
+ b.getI32IntegerAttr(
+ getDynamicSharedMemorySizeConstant().value()));
+ }
/// Returns the number of buffers located in the workgroup memory.
unsigned getNumWorkgroupAttributions() {
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 5eb2cadc884e151..269ee7dcaec0e71 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -618,7 +618,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
Value getBlockSizeZ, Value dynamicSharedMemorySize,
Type asyncTokenType, ValueRange asyncDependencies,
TypeRange workgroupAttributions,
- TypeRange privateAttributions) {
+ TypeRange privateAttributions,
+ IntegerAttr dynamicSharedMemorySizeAttr) {
// Add a WorkGroup attribution attribute. This attribute is required to
// identify private attributions in the list of block argguments.
result.addAttribute(getNumWorkgroupAttributionsAttrName(),
@@ -634,7 +635,9 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
getBlockSizeY, getBlockSizeZ});
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
-
+ if (dynamicSharedMemorySizeAttr)
+ result.addAttribute(getDynamicSharedMemorySizeConstantKeyword(),
+ dynamicSharedMemorySizeAttr);
// Create a kernel body region with kNumConfigRegionAttributes + N memory
// attributions, where the first kNumConfigRegionAttributes arguments have
// `index` type and the rest have the same types as the data operands.
@@ -759,6 +762,10 @@ void LaunchOp::print(OpAsmPrinter &p) {
if (getDynamicSharedMemorySize())
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
<< getDynamicSharedMemorySize();
+ else if (getDynamicSharedMemorySizeConstantAttr()) {
+ p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
+ << getDynamicSharedMemorySizeConstantAttr().getSInt();
+ }
printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
@@ -768,7 +775,8 @@ void LaunchOp::print(OpAsmPrinter &p) {
p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
LaunchOp::getOperandSegmentSizeAttr(),
- getNumWorkgroupAttributionsAttrName()});
+ getNumWorkgroupAttributionsAttrName(),
+ getDynamicSharedMemorySizeConstantKeyword()});
}
// Parse the size assignment blocks for blocks and threads. These have the form
@@ -854,12 +862,20 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
bool hasDynamicSharedMemorySize = false;
if (!parser.parseOptionalKeyword(
LaunchOp::getDynamicSharedMemorySizeKeyword())) {
- hasDynamicSharedMemorySize = true;
- if (parser.parseOperand(dynamicSharedMemorySize) ||
- parser.resolveOperand(dynamicSharedMemorySize,
- parser.getBuilder().getI32Type(),
- result.operands))
- return failure();
+ IntegerAttr shmemAttr;
+ OptionalParseResult shmemAttrResult = parser.parseOptionalAttribute(
+ shmemAttr, parser.getBuilder().getIntegerType(32, true));
+ if (!shmemAttrResult.has_value()) {
+ hasDynamicSharedMemorySize = true;
+ shmemAttr = parser.getBuilder().getSI32IntegerAttr(
+ getDynamicSharedMemorySizeDynamicValue());
+ if (parser.parseOperand(dynamicSharedMemorySize) ||
+ parser.resolveOperand(dynamicSharedMemorySize,
+ parser.getBuilder().getI32Type(),
+ result.operands))
+ return failure();
+ }
+ result.addAttribute(getDynamicSharedMemorySizeConstantKeyword(), shmemAttr);
}
// Create the region arguments, it has kNumConfigRegionAttributes arguments
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index b1e2f914db4cb9b..3e29fbe8cdfbbc3 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -281,7 +281,7 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
launchOp.getBlockSizeOperandValues(),
- launchOp.getDynamicSharedMemorySize(), operands,
+ launchOp.getDynamicSharedMemorySizeValue(builder), operands,
asyncToken ? asyncToken.getType() : nullptr,
launchOp.getAsyncDependencies());
launchOp.replaceAllUsesWith(launchFunc);
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 28c121a550100c2..b032a4035230990 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -372,3 +372,36 @@ func.func @launch_memory_attributions_1(%arg0 : memref<*xf32>) {
}
// CHECK-DL-LABEL: gpu.module @launch_memory_attributions_1_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
+
+
+// -----
+
+// CHECK-LABEL: func.func @dynamic_shared_memory(
+// CHECK-SAME: %[[arg0:.+]]: i32
+func.func @dynamic_shared_memory(%shmemSize : i32) {
+ %c1 = arith.constant 1 : index
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ dynamic_shared_memory_size %shmemSize
+ {
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ dynamic_shared_memory_size 200
+ {
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ {
+ gpu.terminator
+ }
+
+
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[arg0]]
+// CHECK: %[[c200:.+]] = arith.constant 200 : i32
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel_0::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[c200]]
+ return
+}
+
>From abe8adc1c1139985abc1b75cb9adbb0697bcc014 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 16:59:26 +0100
Subject: [PATCH 2/3] address @nicolasvasilache comments
---
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 13 +++++--------
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 6 +++---
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp | 11 ++++++++++-
mlir/test/Dialect/GPU/outlining.mlir | 8 ++++----
4 files changed, 22 insertions(+), 16 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 5bf5cbc5efe628f..17f376cccc640b5 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -588,7 +588,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Optional<I32>:$dynamicSharedMemorySize,
- OptionalAttr<SI32Attr>:$dynamicSharedMemorySizeConstant)>,
+ OptionalAttr<I32Attr>:$dynamicSharedMemorySizeConstant)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -735,18 +735,15 @@ def GPU_LaunchOp : GPU_Op<"launch", [
}
static int getDynamicSharedMemorySizeDynamicValue() {
- return std::numeric_limits<int32_t>::min();
+ return std::numeric_limits<uint32_t>::max();
}
/// Returns a value of the dynamic shared memory size.
/// If it is a constant, it builds one
- mlir::Value getDynamicSharedMemorySizeValue(OpBuilder &b) {
- int32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
+ mlir::OpFoldResult getDynamicSharedMemorySizeValue(OpBuilder &b) {
+ uint32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
if (getDynamicSharedMemorySizeConstant().value_or(kDynamic) == kDynamic)
return getDynamicSharedMemorySize();
- return b.create<mlir::arith::ConstantOp>(
- getLoc(), b.getIntegerType(32),
- b.getI32IntegerAttr(
- getDynamicSharedMemorySizeConstant().value()));
+ return b.getI32IntegerAttr(getDynamicSharedMemorySizeConstant().value());
}
/// Returns the number of buffers located in the workgroup memory.
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 269ee7dcaec0e71..4cfa64dc6a636f3 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -764,7 +764,7 @@ void LaunchOp::print(OpAsmPrinter &p) {
<< getDynamicSharedMemorySize();
else if (getDynamicSharedMemorySizeConstantAttr()) {
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
- << getDynamicSharedMemorySizeConstantAttr().getSInt();
+ << getDynamicSharedMemorySizeConstantAttr().getInt();
}
printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
@@ -864,10 +864,10 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
LaunchOp::getDynamicSharedMemorySizeKeyword())) {
IntegerAttr shmemAttr;
OptionalParseResult shmemAttrResult = parser.parseOptionalAttribute(
- shmemAttr, parser.getBuilder().getIntegerType(32, true));
+ shmemAttr, parser.getBuilder().getIntegerType(32));
if (!shmemAttrResult.has_value()) {
hasDynamicSharedMemorySize = true;
- shmemAttr = parser.getBuilder().getSI32IntegerAttr(
+ shmemAttr = parser.getBuilder().getI32IntegerAttr(
getDynamicSharedMemorySizeDynamicValue());
if (parser.parseOperand(dynamicSharedMemorySize) ||
parser.resolveOperand(dynamicSharedMemorySize,
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 3e29fbe8cdfbbc3..9f1fcf4a14e8154 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -278,10 +278,19 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
// The launch op has an optional dynamic shared memory size. If it doesn't
// exist, we use zero.
Value asyncToken = launchOp.getAsyncToken();
+ Value dynamicSharedSize;
+ OpFoldResult shmem = launchOp.getDynamicSharedMemorySizeValue(builder);
+ if (auto shmemValue = llvm::dyn_cast<Value>(shmem)) {
+ dynamicSharedSize = shmemValue;
+ } else if (auto shmemConst = getConstantIntValue(shmem)) {
+ dynamicSharedSize = builder.create<mlir::arith::ConstantOp>(
+ launchOp->getLoc(), builder.getIntegerType(32),
+ builder.getI32IntegerAttr(shmemConst.value()));
+ }
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
launchOp.getBlockSizeOperandValues(),
- launchOp.getDynamicSharedMemorySizeValue(builder), operands,
+ dynamicSharedSize, operands,
asyncToken ? asyncToken.getType() : nullptr,
launchOp.getAsyncDependencies());
launchOp.replaceAllUsesWith(launchFunc);
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index b032a4035230990..68aabd142ee34a6 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -388,7 +388,7 @@ func.func @dynamic_shared_memory(%shmemSize : i32) {
}
gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
- dynamic_shared_memory_size 200
+ dynamic_shared_memory_size 8192
{
gpu.terminator
}
@@ -398,10 +398,10 @@ func.func @dynamic_shared_memory(%shmemSize : i32) {
gpu.terminator
}
-
// CHECK: gpu.launch_func @dynamic_shared_memory_kernel::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[arg0]]
-// CHECK: %[[c200:.+]] = arith.constant 200 : i32
-// CHECK: gpu.launch_func @dynamic_shared_memory_kernel_0::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[c200]]
+// CHECK: %[[c8192:.+]] = arith.constant 8192 : i32
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel_0::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[c8192]]
+// CHECK: return
return
}
>From 9559fd82f25609681a137591de9ca8bd8597255d Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 17:02:27 +0100
Subject: [PATCH 3/3] format fix
---
mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 06b1ea95d20339d..1364bc8d93208ec 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -15,9 +15,9 @@
#define MLIR_DIALECT_GPU_IR_GPUDIALECT_H
#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
-#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
More information about the Mlir-commits
mailing list