[Mlir-commits] [mlir] [mlir][gpu] Allow integer attribute as `dynamic_shared_memory_size` p… (PR #71509)
Guray Ozen
llvmlistbot at llvm.org
Tue Nov 7 02:34:28 PST 2023
https://github.com/grypp created https://github.com/llvm/llvm-project/pull/71509
…arameter of `gpu.launch`
This PR allows integer attributes as `dynamic_shared_memory_size` parameter of `gpu.launch`. See the example IR below, `200` doesn't have to be SSA value anymore.
```
gpu.launch blocks(..) threads(...)
dynamic_shared_memory_size 200
```
>From 275969718d63de31e403ee29714971c6ea671357 Mon Sep 17 00:00:00 2001
From: Guray Ozen <guray.ozen at gmail.com>
Date: Tue, 7 Nov 2023 11:32:08 +0100
Subject: [PATCH] [mlir][gpu] Allow integer attribute as
`dynamic_shared_memory_size` parameter of `gpu.launch`
This PR allows integer attributes as `dynamic_shared_memory_size` parameter of `gpu.launch`. See the example IR below, `200` doesn't have to be SSA value anymore.
```
gpu.launch blocks(..) threads(...)
dynamic_shared_memory_size 200
```
---
mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h | 1 +
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 24 +++++++++++--
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 34 ++++++++++++++-----
.../GPU/Transforms/KernelOutlining.cpp | 2 +-
mlir/test/Dialect/GPU/outlining.mlir | 33 ++++++++++++++++++
5 files changed, 82 insertions(+), 12 deletions(-)
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 14a1fac5fd255f3..06b1ea95d20339d 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -17,6 +17,7 @@
#include "mlir/Bytecode/BytecodeOpInterface.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 6375d35f4311295..5bf5cbc5efe628f 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -587,7 +587,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
- Optional<I32>:$dynamicSharedMemorySize)>,
+ Optional<I32>:$dynamicSharedMemorySize,
+ OptionalAttr<SI32Attr>:$dynamicSharedMemorySizeConstant)>,
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
let summary = "GPU kernel launch operation";
@@ -693,7 +694,8 @@ def GPU_LaunchOp : GPU_Op<"launch", [
CArg<"Type", "nullptr">:$asyncTokenType,
CArg<"ValueRange", "{}">:$asyncDependencies,
CArg<"TypeRange", "{}">:$workgroupAttributions,
- CArg<"TypeRange", "{}">:$privateAttributions)>
+ CArg<"TypeRange", "{}">:$privateAttributions,
+ CArg<"IntegerAttr", "IntegerAttr()">:$dynamicSharedMemorySizeConstant)>
];
let extraClassDeclaration = [{
@@ -728,6 +730,24 @@ def GPU_LaunchOp : GPU_Op<"launch", [
/// Returns the keywords used in the custom syntax for this Op.
static StringRef getWorkgroupKeyword() { return "workgroup"; }
static StringRef getPrivateKeyword() { return "private"; }
+ static StringRef getDynamicSharedMemorySizeConstantKeyword() {
+ return "dynamicSharedMemorySizeConstant";
+ }
+
+ static int getDynamicSharedMemorySizeDynamicValue() {
+ return std::numeric_limits<int32_t>::min();
+ }
+ /// Returns a value of the dynamic shared memory size.
+ /// If it is a constant, it builds one
+ mlir::Value getDynamicSharedMemorySizeValue(OpBuilder &b) {
+ int32_t kDynamic = getDynamicSharedMemorySizeDynamicValue();
+ if (getDynamicSharedMemorySizeConstant().value_or(kDynamic) == kDynamic)
+ return getDynamicSharedMemorySize();
+ return b.create<mlir::arith::ConstantOp>(
+ getLoc(), b.getIntegerType(32),
+ b.getI32IntegerAttr(
+ getDynamicSharedMemorySizeConstant().value()));
+ }
/// Returns the number of buffers located in the workgroup memory.
unsigned getNumWorkgroupAttributions() {
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 5eb2cadc884e151..269ee7dcaec0e71 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -618,7 +618,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
Value getBlockSizeZ, Value dynamicSharedMemorySize,
Type asyncTokenType, ValueRange asyncDependencies,
TypeRange workgroupAttributions,
- TypeRange privateAttributions) {
+ TypeRange privateAttributions,
+ IntegerAttr dynamicSharedMemorySizeAttr) {
// Add a WorkGroup attribution attribute. This attribute is required to
// identify private attributions in the list of block argguments.
result.addAttribute(getNumWorkgroupAttributionsAttrName(),
@@ -634,7 +635,9 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
getBlockSizeY, getBlockSizeZ});
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
-
+ if (dynamicSharedMemorySizeAttr)
+ result.addAttribute(getDynamicSharedMemorySizeConstantKeyword(),
+ dynamicSharedMemorySizeAttr);
// Create a kernel body region with kNumConfigRegionAttributes + N memory
// attributions, where the first kNumConfigRegionAttributes arguments have
// `index` type and the rest have the same types as the data operands.
@@ -759,6 +762,10 @@ void LaunchOp::print(OpAsmPrinter &p) {
if (getDynamicSharedMemorySize())
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
<< getDynamicSharedMemorySize();
+ else if (getDynamicSharedMemorySizeConstantAttr()) {
+ p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
+ << getDynamicSharedMemorySizeConstantAttr().getSInt();
+ }
printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
@@ -768,7 +775,8 @@ void LaunchOp::print(OpAsmPrinter &p) {
p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
LaunchOp::getOperandSegmentSizeAttr(),
- getNumWorkgroupAttributionsAttrName()});
+ getNumWorkgroupAttributionsAttrName(),
+ getDynamicSharedMemorySizeConstantKeyword()});
}
// Parse the size assignment blocks for blocks and threads. These have the form
@@ -854,12 +862,20 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
bool hasDynamicSharedMemorySize = false;
if (!parser.parseOptionalKeyword(
LaunchOp::getDynamicSharedMemorySizeKeyword())) {
- hasDynamicSharedMemorySize = true;
- if (parser.parseOperand(dynamicSharedMemorySize) ||
- parser.resolveOperand(dynamicSharedMemorySize,
- parser.getBuilder().getI32Type(),
- result.operands))
- return failure();
+ IntegerAttr shmemAttr;
+ OptionalParseResult shmemAttrResult = parser.parseOptionalAttribute(
+ shmemAttr, parser.getBuilder().getIntegerType(32, true));
+ if (!shmemAttrResult.has_value()) {
+ hasDynamicSharedMemorySize = true;
+ shmemAttr = parser.getBuilder().getSI32IntegerAttr(
+ getDynamicSharedMemorySizeDynamicValue());
+ if (parser.parseOperand(dynamicSharedMemorySize) ||
+ parser.resolveOperand(dynamicSharedMemorySize,
+ parser.getBuilder().getI32Type(),
+ result.operands))
+ return failure();
+ }
+ result.addAttribute(getDynamicSharedMemorySizeConstantKeyword(), shmemAttr);
}
// Create the region arguments, it has kNumConfigRegionAttributes arguments
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index b1e2f914db4cb9b..3e29fbe8cdfbbc3 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -281,7 +281,7 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
launchOp.getBlockSizeOperandValues(),
- launchOp.getDynamicSharedMemorySize(), operands,
+ launchOp.getDynamicSharedMemorySizeValue(builder), operands,
asyncToken ? asyncToken.getType() : nullptr,
launchOp.getAsyncDependencies());
launchOp.replaceAllUsesWith(launchFunc);
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 28c121a550100c2..b032a4035230990 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -372,3 +372,36 @@ func.func @launch_memory_attributions_1(%arg0 : memref<*xf32>) {
}
// CHECK-DL-LABEL: gpu.module @launch_memory_attributions_1_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
+
+
+// -----
+
+// CHECK-LABEL: func.func @dynamic_shared_memory(
+// CHECK-SAME: %[[arg0:.+]]: i32
+func.func @dynamic_shared_memory(%shmemSize : i32) {
+ %c1 = arith.constant 1 : index
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ dynamic_shared_memory_size %shmemSize
+ {
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ dynamic_shared_memory_size 200
+ {
+ gpu.terminator
+ }
+ gpu.launch blocks(%bx, %by, %bz) in (%sbx = %c1, %sby = %c1, %sbz = %c1)
+ threads(%tx, %ty, %tz) in (%stx = %c1, %sty = %c1, %stz = %c1)
+ {
+ gpu.terminator
+ }
+
+
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[arg0]]
+// CHECK: %[[c200:.+]] = arith.constant 200 : i32
+// CHECK: gpu.launch_func @dynamic_shared_memory_kernel_0::@dynamic_shared_memory_kernel blocks in (%{{.+}}, %{{.+}}, %{{.+}}) threads in (%{{.+}}, %{{.+}}, %{{.+}}) dynamic_shared_memory_size %[[c200]]
+ return
+}
+
More information about the Mlir-commits
mailing list