[Mlir-commits] [mlir] 54e96f4 - [mlir][GPUDialect] Implement memory attributions for LaunchOp
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Apr 26 15:55:22 PDT 2023
Author: Fabian Mora
Date: 2023-04-26T17:53:18-05:00
New Revision: 54e96f4f975ec05f44d998cd13032b1b55dad8ea
URL: https://github.com/llvm/llvm-project/commit/54e96f4f975ec05f44d998cd13032b1b55dad8ea
DIFF: https://github.com/llvm/llvm-project/commit/54e96f4f975ec05f44d998cd13032b1b55dad8ea.diff
LOG: [mlir][GPUDialect] Implement memory attributions for LaunchOp
Currently memory attributions are not supported for gpu::LaunchOp, this patch implements memory attributions for gpu::LaunchOp and modifies the KernelOutlining pass to make the attributions available in GPUFuncOp.
Reviewed By: makslevental
Differential Revision: https://reviews.llvm.org/D147809
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
mlir/test/Dialect/GPU/outlining.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 860e20720afd9..e67adbc73f929 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -536,12 +536,14 @@ def GPU_LaunchOp : GPU_Op<"launch", [
to the amount of dynamic shared memory a kernel's workgroup should be
allocated; when this operand is not present, a zero size is assumed.
- The body region has _twelve_ arguments, grouped as follows:
+ The body region has at least _twelve_ arguments, grouped as follows:
- three arguments that contain block identifiers along x,y,z dimensions;
- three arguments that contain thread identifiers along x,y,z dimensions;
- operands of the `gpu.launch` operation as is (i.e. the operands for
grid and block sizes).
+ - a variadic number of Workgroup memory attributions.
+ - a variadic number of Private memory attributions.
Syntax:
@@ -550,8 +552,11 @@ def GPU_LaunchOp : GPU_Op<"launch", [
`block` `(` ssa-id-list `)` `in` ssa-reassignment
`threads` `(` ssa-id-list `)` `in` ssa-reassignment
(dynamic_shared_memory_size ssa-use)?
+ memory-attribution
region attr-dict?
ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
+ memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
+ (`private` `(` ssa-id-and-type-list `)`)?
```
Example:
@@ -582,6 +587,18 @@ def GPU_LaunchOp : GPU_Op<"launch", [
"some_op"(%bx, %tx) : (index, index) -> ()
%3 = "memref.load"(%val1, %bx) : (memref<?xf32, 1>, index) -> f32
}
+
+ // Launch with memory attributions.
+ gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
+ threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
+ workgroup(%workgroup: memref<32xf32, 3>)
+ private(%private: memref<1xf32, 5>) {
+ // Block and thread identifiers, as well as block/grid sizes are
+ // immediately usable inside body region.
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ // Assuming %val1 is defined outside the gpu.launch region.
+ %42 = load %workgroup[%bx] : memref<32xf32, 3>
+ }
```
Rationale: using operation/block arguments gives analyses a clear way of
@@ -601,7 +618,9 @@ def GPU_LaunchOp : GPU_Op<"launch", [
"Value":$blockSizeZ,
CArg<"Value", "nullptr">:$dynamicSharedMemorySize,
CArg<"Type", "nullptr">:$asyncTokenType,
- CArg<"ValueRange", "{}">:$asyncDependencies)>
+ CArg<"ValueRange", "{}">:$asyncDependencies,
+ CArg<"TypeRange", "{}">:$workgroupAttributions,
+ CArg<"TypeRange", "{}">:$privateAttributions)>
];
let extraClassDeclaration = [{
@@ -632,6 +651,57 @@ def GPU_LaunchOp : GPU_Op<"launch", [
/// The number of region attributes containing the launch configuration,
/// placed in the leading positions of the argument list.
static constexpr unsigned kNumConfigRegionAttributes = 12;
+
+ /// Returns the keywords used in the custom syntax for this Op.
+ static StringRef getWorkgroupKeyword() { return "workgroup"; }
+ static StringRef getPrivateKeyword() { return "private"; }
+
+ /// Returns the number of buffers located in the workgroup memory.
+ unsigned getNumWorkgroupAttributions() {
+ auto attr = (*this)->getAttrOfType<IntegerAttr>(
+ getNumWorkgroupAttributionsAttrName());
+ return attr ? attr.getInt() : 0;
+ }
+
+ /// Returns a list of block arguments that correspond to buffers located in
+ /// the workgroup memory
+ ArrayRef<BlockArgument> getWorkgroupAttributions() {
+ auto begin =
+ std::next(getBody().args_begin(), kNumConfigRegionAttributes);
+ auto end = std::next(begin, getNumWorkgroupAttributions());
+ return {begin, end};
+ }
+
+ /// Adds a new block argument that corresponds to buffers located in
+ /// workgroup memory.
+ BlockArgument addWorkgroupAttribution(Type type, Location loc);
+
+ /// Returns the number of buffers located in the private memory.
+ unsigned getNumPrivateAttributions() {
+ return getBody().getNumArguments() - kNumConfigRegionAttributes -
+ getNumWorkgroupAttributions();
+ }
+
+ /// Returns a list of block arguments that correspond to buffers located in
+ /// the private memory.
+ ArrayRef<BlockArgument> getPrivateAttributions() {
+ // Buffers on the private memory always come after buffers on the workgroup
+ // memory.
+ auto begin =
+ std::next(getBody().args_begin(),
+ kNumConfigRegionAttributes + getNumWorkgroupAttributions());
+ return {begin, getBody().args_end()};
+ }
+
+ /// Adds a new block argument that corresponds to buffers located in
+ /// private memory.
+ BlockArgument addPrivateAttribution(Type type, Location loc);
+
+ /// Returns the name of the attribute containing the number of buffers
+ /// located in the workgroup memory.
+ static StringRef getNumWorkgroupAttributionsAttrName() {
+ return "workgroup_attributions";
+ }
}];
let hasCanonicalizer = 1;
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index f9d929d163445..3ce6083c1f009 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -332,6 +332,60 @@ static void printAsyncDependencies(OpAsmPrinter &printer, Operation *op,
printer << ']';
}
+// GPU Memory attributions functions shared by LaunchOp and GPUFuncOp.
+/// Parses a GPU function memory attribution.
+///
+/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
+/// (`private` `(` ssa-id-and-type-list `)`)?
+///
+/// Note that this function parses only one of the two similar parts, with the
+/// keyword provided as argument.
+static ParseResult
+parseAttributions(OpAsmParser &parser, StringRef keyword,
+ SmallVectorImpl<OpAsmParser::Argument> &args) {
+ // If we could not parse the keyword, just assume empty list and succeed.
+ if (failed(parser.parseOptionalKeyword(keyword)))
+ return success();
+
+ return parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
+ /*allowType=*/true);
+}
+
+/// Prints a GPU function memory attribution.
+static void printAttributions(OpAsmPrinter &p, StringRef keyword,
+ ArrayRef<BlockArgument> values) {
+ if (values.empty())
+ return;
+
+ p << ' ' << keyword << '(';
+ llvm::interleaveComma(
+ values, p, [&p](BlockArgument v) { p << v << " : " << v.getType(); });
+ p << ')';
+}
+
+/// Verifies a GPU function memory attribution.
+static LogicalResult verifyAttributions(Operation *op,
+ ArrayRef<BlockArgument> attributions,
+ gpu::AddressSpace memorySpace) {
+ for (Value v : attributions) {
+ auto type = v.getType().dyn_cast<MemRefType>();
+ if (!type)
+ return op->emitOpError() << "expected memref type in attribution";
+
+ // We can only verify the address space if it hasn't already been lowered
+ // from the AddressSpaceAttr to a target-specific numeric value.
+ auto addressSpace =
+ type.getMemorySpace().dyn_cast_or_null<gpu::AddressSpaceAttr>();
+ if (!addressSpace)
+ continue;
+ if (addressSpace.getValue() != memorySpace)
+ return op->emitOpError()
+ << "expected memory space " << stringifyAddressSpace(memorySpace)
+ << " in attribution";
+ }
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// AllReduceOp
//===----------------------------------------------------------------------===//
@@ -439,7 +493,15 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
Value gridSizeX, Value gridSizeY, Value gridSizeZ,
Value getBlockSizeX, Value getBlockSizeY,
Value getBlockSizeZ, Value dynamicSharedMemorySize,
- Type asyncTokenType, ValueRange asyncDependencies) {
+ Type asyncTokenType, ValueRange asyncDependencies,
+ TypeRange workgroupAttributions,
+ TypeRange privateAttributions) {
+ // Add a WorkGroup attribution attribute. This attribute is required to
+ // identify private attributions in the list of block argguments.
+ result.addAttribute(getNumWorkgroupAttributionsAttrName(),
+ builder.getI64IntegerAttr(workgroupAttributions.size()));
+
+ // Add Op operands.
result.addOperands(asyncDependencies);
if (asyncTokenType)
result.types.push_back(builder.getType<AsyncTokenType>());
@@ -450,14 +512,21 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
if (dynamicSharedMemorySize)
result.addOperands(dynamicSharedMemorySize);
- // Create a kernel body region with kNumConfigRegionAttributes + N arguments,
- // where the first kNumConfigRegionAttributes arguments have `index` type and
- // the rest have the same types as the data operands.
+ // Create a kernel body region with kNumConfigRegionAttributes + N memory
+ // attributions, where the first kNumConfigRegionAttributes arguments have
+ // `index` type and the rest have the same types as the data operands.
Region *kernelRegion = result.addRegion();
Block *body = new Block();
+ // TODO: Allow passing in proper locations here.
for (unsigned i = 0; i < kNumConfigRegionAttributes; ++i)
body->addArgument(builder.getIndexType(), result.location);
+ // Add WorkGroup & Private attributions to the region arguments.
+ for (Type argTy : workgroupAttributions)
+ body->addArgument(argTy, result.location);
+ for (Type argTy : privateAttributions)
+ body->addArgument(argTy, result.location);
kernelRegion->push_back(body);
+ // Fill OperandSegmentSize Attribute.
SmallVector<int32_t, 8> segmentSizes(8, 1);
segmentSizes.front() = asyncDependencies.size();
segmentSizes.back() = dynamicSharedMemorySize ? 1 : 0;
@@ -504,13 +573,18 @@ LogicalResult LaunchOp::verifyRegions() {
// sizes and transforms them into kNumConfigRegionAttributes region arguments
// for block/thread identifiers and grid/block sizes.
if (!getBody().empty()) {
- if (getBody().getNumArguments() !=
- LaunchOp::kNumConfigOperands + getNumOperands() -
- (getDynamicSharedMemorySize() ? 1 : 0) -
- getAsyncDependencies().size())
+ if (getBody().getNumArguments() <
+ kNumConfigRegionAttributes + getNumWorkgroupAttributions())
return emitOpError("unexpected number of region arguments");
}
+ // Verify Attributions Address Spaces.
+ if (failed(verifyAttributions(getOperation(), getWorkgroupAttributions(),
+ GPUDialect::getWorkgroupAddressSpace())) ||
+ failed(verifyAttributions(getOperation(), getPrivateAttributions(),
+ GPUDialect::getPrivateAddressSpace())))
+ return failure();
+
// Block terminators without successors are expected to exit the kernel region
// and must be `gpu.terminator`.
for (Block &block : getBody()) {
@@ -563,10 +637,15 @@ void LaunchOp::print(OpAsmPrinter &p) {
p << ' ' << getDynamicSharedMemorySizeKeyword() << ' '
<< getDynamicSharedMemorySize();
+ printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
+ printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
+
p << ' ';
+
p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
p.printOptionalAttrDict((*this)->getAttrs(), /*elidedAttrs=*/{
- LaunchOp::getOperandSegmentSizeAttr()});
+ LaunchOp::getOperandSegmentSizeAttr(),
+ getNumWorkgroupAttributionsAttrName()});
}
// Parse the size assignment blocks for blocks and threads. These have the form
@@ -601,8 +680,9 @@ parseSizeAssignment(OpAsmParser &parser,
/// Parses a Launch operation.
/// operation ::= `gpu.launch` (`async` `[` ssa-id-list `]`)?
-// `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
+/// `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
/// `threads` `(` ssa-id-list `)` `in` ssa-reassignment
+/// memory-attribution
/// region attr-dict?
/// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
@@ -659,9 +739,12 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
return failure();
}
- // Introduce the body region and parse it. The region has
- // kNumConfigRegionAttributes arguments that correspond to
- // block/thread identifiers and grid/block sizes, all of the `index` type.
+ // Create the region arguments, it has kNumConfigRegionAttributes arguments
+ // that correspond to block/thread identifiers and grid/block sizes, all
+ // having `index` type, a variadic number of WorkGroup Attributions and
+ // a variadic number of Private Attributions. The number of WorkGroup
+ // Attributions is stored in the attr with name:
+ // LaunchOp::getNumWorkgroupAttributionsAttrName().
Type index = parser.getBuilder().getIndexType();
SmallVector<Type, LaunchOp::kNumConfigRegionAttributes> dataTypes(
LaunchOp::kNumConfigRegionAttributes, index);
@@ -674,6 +757,27 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
regionArguments.push_back(arg);
}
+ Builder &builder = parser.getBuilder();
+ // Parse workgroup memory attributions.
+ if (failed(parseAttributions(parser, LaunchOp::getWorkgroupKeyword(),
+ regionArguments)))
+ return failure();
+
+ // Store the number of operands we just parsed as the number of workgroup
+ // memory attributions.
+ unsigned numWorkgroupAttrs =
+ regionArguments.size() - LaunchOp::kNumConfigRegionAttributes;
+ result.addAttribute(LaunchOp::getNumWorkgroupAttributionsAttrName(),
+ builder.getI64IntegerAttr(numWorkgroupAttrs));
+
+ // Parse private memory attributions.
+ if (failed(parseAttributions(parser, LaunchOp::getPrivateKeyword(),
+ regionArguments)))
+ return failure();
+
+ // Introduce the body region and parse it. The region has
+ // kNumConfigRegionAttributes arguments that correspond to
+ // block/thread identifiers and grid/block sizes, all having `index` type.
Region *body = result.addRegion();
if (parser.parseRegion(*body, regionArguments) ||
parser.parseOptionalAttrDict(result.attributes))
@@ -729,6 +833,25 @@ void LaunchOp::getCanonicalizationPatterns(RewritePatternSet &rewrites,
rewrites.add<FoldLaunchArguments>(context);
}
+/// Adds a new block argument that corresponds to buffers located in
+/// workgroup memory.
+BlockArgument LaunchOp::addWorkgroupAttribution(Type type, Location loc) {
+ auto attrName = getNumWorkgroupAttributionsAttrName();
+ auto attr = (*this)->getAttrOfType<IntegerAttr>(attrName);
+ (*this)->setAttr(attrName,
+ IntegerAttr::get(attr.getType(), attr.getValue() + 1));
+ return getBody().insertArgument(
+ LaunchOp::kNumConfigRegionAttributes + attr.getInt(), type, loc);
+}
+
+/// Adds a new block argument that corresponds to buffers located in
+/// private memory.
+BlockArgument LaunchOp::addPrivateAttribution(Type type, Location loc) {
+ // Buffers on the private memory always come after buffers on the workgroup
+ // memory.
+ return getBody().addArgument(type, loc);
+}
+
//===----------------------------------------------------------------------===//
// LaunchFuncOp
//===----------------------------------------------------------------------===//
@@ -894,24 +1017,6 @@ void GPUFuncOp::build(OpBuilder &builder, OperationState &result,
body->getBlocks().push_back(entryBlock);
}
-/// Parses a GPU function memory attribution.
-///
-/// memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
-/// (`private` `(` ssa-id-and-type-list `)`)?
-///
-/// Note that this function parses only one of the two similar parts, with the
-/// keyword provided as argument.
-static ParseResult
-parseAttributions(OpAsmParser &parser, StringRef keyword,
- SmallVectorImpl<OpAsmParser::Argument> &args) {
- // If we could not parse the keyword, just assume empty list and succeed.
- if (failed(parser.parseOptionalKeyword(keyword)))
- return success();
-
- return parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
- /*allowType=*/true);
-}
-
/// Parses a GPU function.
///
/// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
@@ -985,17 +1090,6 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
return parser.parseRegion(*body, entryArgs);
}
-static void printAttributions(OpAsmPrinter &p, StringRef keyword,
- ArrayRef<BlockArgument> values) {
- if (values.empty())
- return;
-
- p << ' ' << keyword << '(';
- llvm::interleaveComma(
- values, p, [&p](BlockArgument v) { p << v << " : " << v.getType(); });
- p << ')';
-}
-
void GPUFuncOp::print(OpAsmPrinter &p) {
p << ' ';
p.printSymbolName(getName());
@@ -1026,28 +1120,6 @@ LogicalResult GPUFuncOp::verifyType() {
return success();
}
-static LogicalResult verifyAttributions(Operation *op,
- ArrayRef<BlockArgument> attributions,
- gpu::AddressSpace memorySpace) {
- for (Value v : attributions) {
- auto type = v.getType().dyn_cast<MemRefType>();
- if (!type)
- return op->emitOpError() << "expected memref type in attribution";
-
- // We can only verify the address space if it hasn't already been lowered
- // from the AddressSpaceAttr to a target-specific numeric value.
- auto addressSpace =
- type.getMemorySpace().dyn_cast_or_null<gpu::AddressSpaceAttr>();
- if (!addressSpace)
- continue;
- if (addressSpace.getValue() != memorySpace)
- return op->emitOpError()
- << "expected memory space " << stringifyAddressSpace(memorySpace)
- << " in attribution";
- }
- return success();
-}
-
/// Verifies the body of the function.
LogicalResult GPUFuncOp::verifyBody() {
if (empty())
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 8abf75993a63d..91c1c763f070d 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -190,7 +190,10 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
}
FunctionType type =
FunctionType::get(launchOp.getContext(), kernelOperandTypes, {});
- auto outlinedFunc = builder.create<gpu::GPUFuncOp>(loc, kernelFnName, type);
+ auto outlinedFunc = builder.create<gpu::GPUFuncOp>(
+ loc, kernelFnName, type,
+ TypeRange(ValueRange(launchOp.getWorkgroupAttributions())),
+ TypeRange(ValueRange(launchOp.getPrivateAttributions())));
outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
builder.getUnitAttr());
@@ -213,6 +216,16 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
Region &outlinedFuncBody = outlinedFunc.getBody();
injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map);
+ // Map memory attributions from the LaunOp op to the GPUFuncOp attributions.
+ for (const auto &[launchArg, funcArg] :
+ llvm::zip(launchOp.getWorkgroupAttributions(),
+ outlinedFunc.getWorkgroupAttributions()))
+ map.map(launchArg, funcArg);
+ for (const auto &[launchArg, funcArg] :
+ llvm::zip(launchOp.getPrivateAttributions(),
+ outlinedFunc.getPrivateAttributions()))
+ map.map(launchArg, funcArg);
+
// Map arguments from gpu.launch region to the arguments of the gpu.func
// operation.
Block &entryBlock = outlinedFuncBody.front();
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 422e0c154dd47..ca776968d998b 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -310,3 +310,65 @@ func.func @non_constant_launches(%arg0 : index) {
}
// CHECK-DL-LABEL: gpu.module @non_constant_launches_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
+
+// CHECK: module attributes {gpu.container_module}
+
+// -----
+
+// This test checks memory attributions for gpu.launch, using both workgroup and private attributions.
+// CHECK-LABEL: func @launch_memory_attributions_0()
+func.func @launch_memory_attributions_0() {
+ %1 = "op"() : () -> (memref<?xf32, 1>)
+ %128 = arith.constant 128 : index
+
+ // CHECK: gpu.launch_func @launch_memory_attributions_0_kernel::@launch_memory_attributions_0_kernel
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %128, %grid_y = %128,
+ %grid_z = %128)
+ threads(%tx, %ty, %tz) in (%block_x = %128, %block_y = %128,
+ %block_z = %128)
+ workgroup(%shared: memref<42xf32, 3>)
+ private(%priv0: memref<2xf32, 5>, %priv1: memref<1xf32, 5>) {
+ "some_op"(%bx, %block_x) : (index, index) -> ()
+ %42 = memref.load %1[%tx] : memref<?xf32, 1>
+ %43 = memref.load %shared[%tx] : memref<42xf32, 3>
+ %44 = memref.load %priv1[%tx] : memref<1xf32, 5>
+ gpu.terminator
+ }
+ return
+}
+
+// CHECK-DL-LABEL: gpu.module @launch_memory_attributions_0_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
+
+// CHECK-LABEL: gpu.module @launch_memory_attributions_0_kernel
+// CHECK-NEXT: gpu.func @launch_memory_attributions_0_kernel
+// CHECK-SAME: workgroup(%[[KERNEL_ARG1:.*]] : memref<42xf32, 3>)
+// CHECK-SAME: private(%[[KERNEL_ARG2:.*]] : memref<2xf32, 5>, %[[KERNEL_ARG3:.*]] : memref<1xf32, 5>)
+// CHECK: %[[TID:.*]] = gpu.thread_id x
+// CHECK: = memref.load %[[KERNEL_ARG1]][%[[TID]]] : memref<42xf32, 3>
+// CHECK-NEXT: = memref.load %[[KERNEL_ARG3]][%[[TID]]] : memref<1xf32, 5>
+
+// -----
+
+// This test checks correctness of private attributions in the absence of workgroup attributions.
+// CHECK-LABEL: @launch_memory_attributions_1
+func.func @launch_memory_attributions_1(%arg0 : memref<*xf32>) {
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %d = memref.dim %arg0, %c2 : memref<*xf32>
+ // CHECK: gpu.func {{.*}} private(%[[KERNEL_ARG:.*]] : memref<3xf32, 5>) {{.*}} {
+ // CHECK: %[[C2:.*]] = arith.constant 2 : index
+ // CHECK: = memref.load %[[KERNEL_ARG]][%[[C2]]] : memref<3xf32, 5>
+ // CHECK: gpu.return
+ // CHECK: }
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1,
+ %grid_z = %c1)
+ threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1,
+ %block_z = %c1)
+ private(%priv0: memref<3xf32, 5>) {
+ %42 = memref.load %priv0[%c2] : memref<3xf32, 5>
+ gpu.terminator
+ }
+ return
+}
+
+// CHECK-DL-LABEL: gpu.module @launch_memory_attributions_1_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
More information about the Mlir-commits
mailing list