[Mlir-commits] [mlir] 598c5dd - [mlir][bufferize] Support fully dynamic layout maps in BufferResultsToOutParams
Matthias Springer
llvmlistbot at llvm.org
Mon May 23 09:47:20 PDT 2022
Author: Matthias Springer
Date: 2022-05-23T18:38:22+02:00
New Revision: 598c5ddba6b0fd1e7226ffd4dc34e44ec7c7c513
URL: https://github.com/llvm/llvm-project/commit/598c5ddba6b0fd1e7226ffd4dc34e44ec7c7c513
DIFF: https://github.com/llvm/llvm-project/commit/598c5ddba6b0fd1e7226ffd4dc34e44ec7c7c513.diff
LOG: [mlir][bufferize] Support fully dynamic layout maps in BufferResultsToOutParams
Also fixes integration of the pass into One-Shot Bufferize and adds additional test cases.
BufferResultsToOutParams can be used with "identity-layout-map" and "fully-dynamic-layout-map". "infer-layout-map" is not supported.
Differential Revision: https://reviews.llvm.org/D125636
Added:
Modified:
mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index c9018280ea23f..4e189b4fd89d6 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -15,20 +15,50 @@
using namespace mlir;
+/// Return `true` if the given MemRef type has a fully dynamic layout.
+static bool hasFullyDynamicLayoutMap(MemRefType type) {
+ int64_t offset;
+ SmallVector<int64_t, 4> strides;
+ if (failed(getStridesAndOffset(type, strides, offset)))
+ return false;
+ if (!llvm::all_of(strides, [](int64_t stride) {
+ return ShapedType::isDynamicStrideOrOffset(stride);
+ }))
+ return false;
+ if (!ShapedType::isDynamicStrideOrOffset(offset))
+ return false;
+ return true;
+}
+
+/// Return `true` if the given MemRef type has a static identity layout (i.e.,
+/// no layout).
+static bool hasStaticIdentityLayout(MemRefType type) {
+ return type.getLayout().isIdentity();
+}
+
// Updates the func op and entry block.
//
// Any args appended to the entry block are added to `appendedEntryArgs`.
-static void updateFuncOp(func::FuncOp func,
- SmallVectorImpl<BlockArgument> &appendedEntryArgs) {
+static LogicalResult
+updateFuncOp(func::FuncOp func,
+ SmallVectorImpl<BlockArgument> &appendedEntryArgs) {
auto functionType = func.getFunctionType();
// Collect information about the results will become appended arguments.
SmallVector<Type, 6> erasedResultTypes;
BitVector erasedResultIndices(functionType.getNumResults());
for (const auto &resultType : llvm::enumerate(functionType.getResults())) {
- if (resultType.value().isa<BaseMemRefType>()) {
+ if (auto memrefType = resultType.value().dyn_cast<MemRefType>()) {
+ if (!hasStaticIdentityLayout(memrefType) &&
+ !hasFullyDynamicLayoutMap(memrefType)) {
+ // Only buffers with static identity layout can be allocated. These can
+ // be casted to memrefs with fully dynamic layout map. Other layout maps
+ // are not supported.
+ return func->emitError()
+ << "cannot create out param for result with unsupported layout";
+ }
erasedResultIndices.set(resultType.index());
- erasedResultTypes.push_back(resultType.value());
+ erasedResultTypes.push_back(memrefType);
}
}
@@ -51,10 +81,12 @@ static void updateFuncOp(func::FuncOp func,
// Add the new arguments to the entry block if the function is not external.
if (func.isExternal())
- return;
+ return success();
Location loc = func.getLoc();
for (Type type : erasedResultTypes)
appendedEntryArgs.push_back(func.front().addArgument(type, loc));
+
+ return success();
}
// Updates all ReturnOps in the scope of the given func::FuncOp by either
@@ -66,7 +98,7 @@ static void updateReturnOps(func::FuncOp func,
SmallVector<Value, 6> copyIntoOutParams;
SmallVector<Value, 6> keepAsReturnOperands;
for (Value operand : op.getOperands()) {
- if (operand.getType().isa<BaseMemRefType>())
+ if (operand.getType().isa<MemRefType>())
copyIntoOutParams.push_back(operand);
else
keepAsReturnOperands.push_back(operand);
@@ -88,7 +120,7 @@ static LogicalResult updateCalls(ModuleOp module) {
SmallVector<Value, 6> replaceWithNewCallResults;
SmallVector<Value, 6> replaceWithOutParams;
for (OpResult result : op.getResults()) {
- if (result.getType().isa<BaseMemRefType>())
+ if (result.getType().isa<MemRefType>())
replaceWithOutParams.push_back(result);
else
replaceWithNewCallResults.push_back(result);
@@ -96,14 +128,24 @@ static LogicalResult updateCalls(ModuleOp module) {
SmallVector<Value, 6> outParams;
OpBuilder builder(op);
for (Value memref : replaceWithOutParams) {
- if (!memref.getType().cast<BaseMemRefType>().hasStaticShape()) {
+ if (!memref.getType().cast<MemRefType>().hasStaticShape()) {
op.emitError()
<< "cannot create out param for dynamically shaped result";
didFail = true;
return;
}
- Value outParam = builder.create<memref::AllocOp>(
- op.getLoc(), memref.getType().cast<MemRefType>());
+ auto memrefType = memref.getType().cast<MemRefType>();
+ auto allocType =
+ MemRefType::get(memrefType.getShape(), memrefType.getElementType(),
+ AffineMap(), memrefType.getMemorySpaceAsInt());
+ Value outParam = builder.create<memref::AllocOp>(op.getLoc(), allocType);
+ if (!hasStaticIdentityLayout(memrefType)) {
+ // Layout maps are already checked in `updateFuncOp`.
+ assert(hasFullyDynamicLayoutMap(memrefType) &&
+ "layout map not supported");
+ outParam =
+ builder.create<memref::CastOp>(op.getLoc(), memrefType, outParam);
+ }
memref.replaceAllUsesWith(outParam);
outParams.push_back(outParam);
}
@@ -126,7 +168,8 @@ LogicalResult
mlir::bufferization::promoteBufferResultsToOutParams(ModuleOp module) {
for (auto func : module.getOps<func::FuncOp>()) {
SmallVector<BlockArgument, 6> appendedEntryArgs;
- updateFuncOp(func, appendedEntryArgs);
+ if (failed(updateFuncOp(func, appendedEntryArgs)))
+ return failure();
if (func.isExternal())
continue;
updateReturnOps(func, appendedEntryArgs);
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index c4ce22c4539fa..9fee93edee1a0 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -307,19 +307,18 @@ bufferization::finalizeBuffers(Operation *op,
&hasLeakingAllocs)))
return failure();
- if (hasLeakingAllocs) {
- // Promote returned buffers to "out" parameters.
- // TODO: Pass options to support custom dealloc ops.
- if (options.promoteBufferResultsToOutParams && isa<ModuleOp>(op) &&
- failed(promoteBufferResultsToOutParams(cast<ModuleOp>(op))))
- return failure();
-
- // Create deallocation ops for all "leaking buffers" and all buffer
- // allocations that were added during the above promotion process.
- // TODO: Pass options to support custom dealloc ops.
- if (options.createDeallocs && failed(deallocateBuffers(op)))
- return failure();
- }
+ // Promote returned buffers to "out" parameters.
+ // TODO: Pass options to support custom dealloc ops.
+ if (options.promoteBufferResultsToOutParams && isa<ModuleOp>(op) &&
+ failed(promoteBufferResultsToOutParams(cast<ModuleOp>(op))))
+ return failure();
+
+ // Create deallocation ops for all "leaking buffers" and all buffer
+ // allocations that were added during the above promotion process.
+ // TODO: Pass options to support custom dealloc ops.
+ if (hasLeakingAllocs && options.createDeallocs &&
+ failed(deallocateBuffers(op)))
+ return failure();
// Deallocate all remaining buffers at the end of their parent blocks.
if (failed(createAllocDeallocOps(op, options)))
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
index 517f71b0aef41..236b961f78a6b 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
@@ -1,11 +1,19 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs promote-buffer-results-to-out-params" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs promote-buffer-results-to-out-params function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs promote-buffer-results-to-out-params function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=infer-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE
+
+// Note: function-boundary-type-conversion=infer-layout-map with
+// promote-buffer-results-to-out-params is an unsupported combination.
// Note: This bufferization is not very efficient yet, but it works.
// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
// CHECK-LABEL: func @callee(
// CHECK-SAME: %[[arg0:.*]]: memref<5xf32, #[[$map1]]>,
-// CHECK-SAME: %[[arg1:.*]]: memref<5xf32>) {
+// CHECK-SAME: %[[arg1:.*]]: memref<5xf32, #[[$map1]]>) {
+// This alloc is not needed, but it is inserted due to the out-of-place
+// bufferization of the tensor.insert. With a better layering of the out param
+// promotion pass, this alloc could be avoided.
// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
// CHECK: memref.copy %[[arg0]], %[[alloc]]
// CHECK: memref.store %{{.*}}, %[[alloc]]
@@ -13,21 +21,46 @@
// CHECK: memref.dealloc %[[alloc]]
// CHECK: return
// CHECK: }
+
+// CHECK-NO-LAYOUT-LABEL: func @callee(
+// CHECK-NO-LAYOUT-SAME: %[[arg0:.*]]: memref<5xf32>,
+// CHECK-NO-LAYOUT-SAME: %[[arg1:.*]]: memref<5xf32>) {
+// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
+// CHECK-NO-LAYOUT: memref.copy %[[arg0]], %[[alloc]]
+// CHECK-NO-LAYOUT: memref.store {{.*}}, %[[alloc]]
+// CHECK-NO-LAYOUT: memref.copy %[[alloc]], %[[arg1]]
+// CHECK-NO-LAYOUT: memref.dealloc %[[alloc]]
+
+// CHECK-BASELINE: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+// CHECK-BASELINE-LABEL: func @callee(
+// CHECK-BASELINE-SAME: %[[arg0:.*]]: memref<5xf32, #[[$map1]]>) -> memref<5xf32> {
+// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
+// CHECK-BASELINE: memref.copy %[[arg0]], %[[alloc]]
+// CHECK-BASELINE: memref.store {{.*}}, %[[alloc]]
+// CHECK-BASELINE: return %[[alloc]]
func.func @callee(%t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) {
%c0 = arith.constant 0 : index
%cst = arith.constant 8.0 : f32
+ // This must bufferize out-of-place.
%1 = tensor.insert %cst into %t[%c0] : tensor<5xf32>
+ // Instead of returning %1, copy into new out param. %t will disappear
+ // entirely because the buffer is equivalent to a bbArg.
return %t, %1 : tensor<5xf32>, tensor<5xf32>
}
// CHECK: func @main(%[[arg0:.*]]: memref<5xf32, #[[$map1]]>) -> (f32, f32) {
// CHECK: %[[alloc:.*]] = memref.alloc() : memref<5xf32>
-// CHECK: call @callee(%[[arg0]], %[[alloc]])
+// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<5xf32> to memref<5xf32, #[[$map1]]>
+// CHECK: call @callee(%[[arg0]], %[[casted]])
// CHECK: %[[l1:.*]] = memref.load %[[arg0]]
// CHECK: %[[l2:.*]] = memref.load %[[alloc]]
// CHECK: memref.dealloc %[[alloc]]
// CHECK: return %[[l1]], %[[l2]]
// CHECK: }
+
+// CHECK-NO-LAYOUT-LABEL: func @main(%{{.*}}: memref<5xf32>) -> (f32, f32) {
+// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<5xf32>
+// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]])
func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
%c0 = arith.constant 0 : index
%0, %1 = func.call @callee(%t)
@@ -37,3 +70,63 @@ func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
return %2, %3 : f32, f32
}
+// -----
+
+// CHECK: #[[$map2a:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
+// CHECK: #[[$map2b:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 20 + s0 + d1)>
+// CHECK-LABEL: func @callee(
+// CHECK-SAME: %{{.*}}: index,
+// CHECK-SAME: %[[r:.*]]: memref<2x5xf32, #[[$map2a]]>) {
+// CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
+// CHECK: %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, #[[$map2b]]>
+// CHECK: memref.copy %[[subview]], %[[r]]
+// CHECK: memref.dealloc %[[alloc]]
+
+// CHECK-NO-LAYOUT-LABEL: func @callee(
+// CHECK-NO-LAYOUT-SAME: %{{.*}}: index,
+// CHECK-NO-LAYOUT-SAME: %[[r:.*]]: memref<2x5xf32>) {
+// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
+// CHECK-NO-LAYOUT: %[[subview:.*]] = memref.subview %[[alloc]]
+// Note: This alloc is not needed, but it is inserted before the returned buffer
+// is promoted to an out param to reconcile mismatching layout maps on return
+// value and function signature.
+// CHECK-NO-LAYOUT: %[[alloc2:.*]] = memref.alloc() : memref<2x5xf32>
+// CHECK-NO-LAYOUT: memref.copy %[[subview]], %[[alloc2]]
+// CHECK-NO-LAYOUT: memref.dealloc %[[alloc]]
+// CHECK-NO-LAYOUT: memref.copy %[[alloc2]], %[[r]]
+// CHECK-NO-LAYOUT: memref.dealloc %[[alloc2]]
+
+// CHECK-BASELINE: #[[$map2:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 20 + s0 + d1)>
+// CHECK-BASELINE-LABEL: func @callee(
+// CHECK-BASELINE-SAME: %{{.*}}: index) -> memref<2x5xf32, #[[$map2]]> {
+// CHECK-BASELINE: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10x20xf32>
+// CHECK-BASELINE: %[[subview:.*]] = memref.subview %[[alloc]]
+// CHECK-BASELINE: return %[[subview]]
+func.func @callee(%idx: index) -> tensor<2x5xf32> {
+ %0 = bufferization.alloc_tensor() : tensor<10x20xf32>
+ %1 = tensor.extract_slice %0[%idx, %idx][2, 5][1, 1] : tensor<10x20xf32> to tensor<2x5xf32>
+ return %1 : tensor<2x5xf32>
+}
+
+// CHECK: func @main(
+// CHECK: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32>
+// CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<2x5xf32> to memref<2x5xf32, #[[$map2a]]>
+// CHECK: call @callee(%{{.*}}, %[[casted]])
+// CHECK: memref.load %[[alloc]]
+// CHECK: memref.dealloc %[[alloc]]
+
+// CHECK-NO-LAYOUT: func @main(
+// CHECK-NO-LAYOUT: %[[alloc:.*]] = memref.alloc() : memref<2x5xf32>
+// CHECK-NO-LAYOUT: call @callee(%{{.*}}, %[[alloc]])
+// CHECK-NO-LAYOUT: memref.load %[[alloc]]
+// CHECK-NO-LAYOUT: memref.dealloc
+
+// CHECK-BASELINE: func @main(
+// CHECK-BASELINE: %[[call:.*]] = call @callee
+// CHECK-BASELINE: memref.load %[[call]]
+func.func @main(%idx: index) -> f32 {
+ %c0 = arith.constant 0 : index
+ %0 = func.call @callee(%idx) : (index) -> (tensor<2x5xf32>)
+ %1 = tensor.extract %0[%c0, %c0] : tensor<2x5xf32>
+ return %1 : f32
+}
More information about the Mlir-commits
mailing list