[Mlir-commits] [mlir] 0d1d363 - [MLIR] Added PromoteBuffersToStackPass to convert heap- to stack-based allocations.
Julian Gross
llvmlistbot at llvm.org
Fri Oct 23 03:11:49 PDT 2020
Author: Julian Gross
Date: 2020-10-23T12:02:25+02:00
New Revision: 0d1d363c51c93614783755426cb58c819c164fab
URL: https://github.com/llvm/llvm-project/commit/0d1d363c51c93614783755426cb58c819c164fab
DIFF: https://github.com/llvm/llvm-project/commit/0d1d363c51c93614783755426cb58c819c164fab.diff
LOG: [MLIR] Added PromoteBuffersToStackPass to convert heap- to stack-based allocations.
Added optimization pass to convert heap-based allocs to stack-based allocas in
buffer placement. Added the corresponding test file.
Differential Revision: https://reviews.llvm.org/D89688
Added:
mlir/test/Transforms/promote-buffers-to-stack.mlir
Modified:
mlir/include/mlir/Transforms/Passes.h
mlir/include/mlir/Transforms/Passes.td
mlir/lib/Transforms/BufferOptimizations.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 54d348fd3a63..b02915e5ac75 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -40,6 +40,10 @@ std::unique_ptr<Pass> createBufferHoistingPass();
/// reallocations inside of loops.
std::unique_ptr<Pass> createBufferLoopHoistingPass();
+/// Creates a pass that promotes heap-based allocations to stack-based ones.
+std::unique_ptr<Pass>
+createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024);
+
/// Creates an instance of the Canonicalizer pass.
std::unique_ptr<Pass> createCanonicalizerPass();
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 8174ecd2021a..f63290fb264a 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -201,6 +201,22 @@ def BufferLoopHoisting : FunctionPass<"buffer-loop-hoisting"> {
let constructor = "mlir::createBufferLoopHoistingPass()";
}
+def PromoteBuffersToStack : FunctionPass<"promote-buffers-to-stack"> {
+ let summary = "Promotes heap-based allocations to automatically managed "
+ "stack-based allocations";
+ let description = [{
+ This pass implements a simple algorithm to convert heap-based memory
+ allocations to stack-based ones. It uses a built-in heuristic to decide
+ whether it makes sense to convert an allocation.
+ }];
+ let constructor = "mlir::createPromoteBuffersToStackPass()";
+ let options = [
+ Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
+ /*default=*/"1024",
+ "Define the maximum size in bytes to promote allocations to stack.">,
+ ];
+}
+
def Canonicalizer : Pass<"canonicalize"> {
let summary = "Canonicalize operations";
let description = [{
diff --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Transforms/BufferOptimizations.cpp
index 2686d35ee323..cd69a7cc9f13 100644
--- a/mlir/lib/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Transforms/BufferOptimizations.cpp
@@ -6,9 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements logic for two optimization passes. These passes try to
-// hoist alloc nodes to reduce the number of allocations and copies during
-// buffer deallocation.
+// This file implements logic for three optimization passes. The first two
+// passes try to move alloc nodes out of blocks to reduce the number of
+// allocations and copies during buffer deallocation. The third pass tries to
+// convert heap-based allocations to stack-based allocations, if possible.
#include "PassDetail.h"
#include "mlir/IR/Operation.h"
@@ -19,6 +20,63 @@
using namespace mlir;
+/// Returns true if the given operation implements a known high-level region-
+/// based control-flow interface.
+static bool isKnownControlFlowInterface(Operation *op) {
+ return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
+}
+
+/// Check if the size of the allocation is less than the given size. The
+/// transformation is only applied to small buffers since large buffers could
+/// exceed the stack space.
+static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes) {
+ auto type = alloc.getType().dyn_cast<ShapedType>();
+ if (!type || !type.hasStaticShape())
+ return false;
+ return type.getSizeInBits() < maximumSizeInBytes * 8;
+}
+
+/// Checks whether the given aliases leave the allocation scope.
+static bool
+leavesAllocationScope(Region *parentRegion,
+ const BufferPlacementAliasAnalysis::ValueSetT &aliases) {
+ for (Value alias : aliases) {
+ for (auto *use : alias.getUsers()) {
+ // If there is at least one alias that leaves the parent region, we know
+ // that this alias escapes the whole region and hence the associated
+ // allocation leaves allocation scope.
+ if (use->hasTrait<OpTrait::ReturnLike>() &&
+ use->getParentRegion() == parentRegion)
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Checks, if an automated allocation scope for a given alloc value exists.
+static bool
+hasAllocationScope(Value alloc,
+ const BufferPlacementAliasAnalysis &aliasAnalysis) {
+ Region *region = alloc.getParentRegion();
+ do {
+ if (Operation *parentOp = region->getParentOp()) {
+ // Check if the operation is an automatic allocation scope and whether an
+ // alias leaves the scope. This means, an allocation yields out of
+ // this scope and can not be transformed in a stack-based allocation.
+ if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() &&
+ !leavesAllocationScope(region, aliasAnalysis.resolve(alloc)))
+ return true;
+ // Check if the operation is a known control flow interface and break the
+ // loop to avoid transformation in loops. Furthermore skip transformation
+ // if the operation does not implement a RegionBeanchOpInterface.
+ if (BufferPlacementTransformationBase::isLoop(parentOp) ||
+ !isKnownControlFlowInterface(parentOp))
+ break;
+ }
+ } while ((region = region->getParentRegion()));
+ return false;
+}
+
namespace {
//===----------------------------------------------------------------------===//
@@ -46,13 +104,6 @@ struct BufferAllocationHoistingStateBase {
/// Implements the actual hoisting logic for allocation nodes.
template <typename StateT>
class BufferAllocationHoisting : public BufferPlacementTransformationBase {
-private:
- /// Returns true if the given operation implements a known high-level region-
- /// based control-flow interface.
- static bool isKnownControlFlowInterface(Operation *op) {
- return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op);
- }
-
public:
BufferAllocationHoisting(Operation *op)
: BufferPlacementTransformationBase(op), dominators(op),
@@ -220,6 +271,44 @@ struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase {
void recordMoveToParent(Block *block) { placementBlock = block; }
};
+//===----------------------------------------------------------------------===//
+// BufferPlacementPromotion
+//===----------------------------------------------------------------------===//
+
+/// Promotes heap-based allocations to stack-based allocations (if possible).
+class BufferPlacementPromotion : BufferPlacementTransformationBase {
+public:
+ BufferPlacementPromotion(Operation *op)
+ : BufferPlacementTransformationBase(op) {}
+
+ /// Promote buffers to stack-based allocations.
+ void promote(unsigned maximumSize) {
+ for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
+ Value alloc = std::get<0>(entry);
+ // Checking several requirements to transform an AllocOp into an AllocaOp.
+ // The transformation is done if the allocation is limited to a given
+ // size. Furthermore, a deallocation must not be defined for this
+ // allocation entry and a parent allocation scope must exist.
+ if (!isSmallAlloc(alloc, maximumSize) || std::get<1>(entry) ||
+ !hasAllocationScope(alloc, aliases))
+ continue;
+
+ Operation *startOperation = BufferPlacementAllocs::getStartOperation(
+ alloc, alloc.getParentBlock(), liveness);
+ // Build a new alloca that is associated with its parent
+ // `AutomaticAllocationScope` determined during the initialization phase.
+ OpBuilder builder(startOperation);
+ auto alloca = builder.create<AllocaOp>(
+ alloc.getLoc(), alloc.getType().cast<MemRefType>());
+
+ // Replace the original alloc by a newly created alloca.
+ Operation *allocOp = alloc.getDefiningOp();
+ allocOp->replaceAllUsesWith(alloca.getOperation());
+ allocOp->erase();
+ }
+ }
+};
+
//===----------------------------------------------------------------------===//
// BufferOptimizationPasses
//===----------------------------------------------------------------------===//
@@ -247,6 +336,24 @@ struct BufferLoopHoistingPass : BufferLoopHoistingBase<BufferLoopHoistingPass> {
}
};
+/// The promote buffer to stack pass that tries to convert alloc nodes into
+/// alloca nodes.
+struct PromoteBuffersToStackPass
+ : PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
+
+ PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes)
+ : maximumSize(maxAllocSizeInBytes) {}
+
+ void runOnFunction() override {
+ // Move all allocation nodes and convert candidates into allocas.
+ BufferPlacementPromotion optimizer(getFunction());
+ optimizer.promote(maximumSize);
+ }
+
+private:
+ const unsigned maximumSize;
+};
+
} // end anonymous namespace
std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
@@ -256,3 +363,8 @@ std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
return std::make_unique<BufferLoopHoistingPass>();
}
+
+std::unique_ptr<Pass>
+mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes) {
+ return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes);
+}
diff --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir
new file mode 100644
index 000000000000..2f195943b35a
--- /dev/null
+++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir
@@ -0,0 +1,664 @@
+// RUN: mlir-opt -promote-buffers-to-stack -split-input-file %s | FileCheck %s
+
+// This file checks the behavior of PromoteBuffersToStack pass for converting
+// AllocOps into AllocaOps, if possible.
+
+// Test Case:
+// bb0
+// / \
+// bb1 bb2 <- Initial position of AllocOp
+// \ /
+// bb3
+// PromoteBuffersToStack expected behavior: It should convert %0 into an
+// AllocaOp.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @condBranch
+func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+ cond_br %arg0, ^bb1, ^bb2
+^bb1:
+ br ^bb3(%arg1 : memref<2xf32>)
+^bb2:
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ br ^bb3(%0 : memref<2xf32>)
+^bb3(%1: memref<2xf32>):
+ "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: cond_br {{.*}}
+// CHECK: ^bb2
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK: linalg.copy
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case:
+// bb0
+// / \
+// bb1 bb2 <- Initial position of AllocOp
+// \ /
+// bb3
+// PromoteBuffersToStack expected behavior:
+// Since the alloc has dynamic type, it is not converted into an alloca.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @condBranchDynamicType
+func @condBranchDynamicType(
+ %arg0: i1,
+ %arg1: memref<?xf32>,
+ %arg2: memref<?xf32>,
+ %arg3: index) {
+ cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+^bb1:
+ br ^bb3(%arg1 : memref<?xf32>)
+^bb2(%0: index):
+ %1 = alloc(%0) : memref<?xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg1: memref<?xf32>)
+ outs(%1: memref<?xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ br ^bb3(%1 : memref<?xf32>)
+^bb3(%2: memref<?xf32>):
+ "linalg.copy"(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: cond_br
+// CHECK: ^bb2
+// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
+// CHECK-NEXT: %[[ALLOC0:.*]] = alloc(%[[IDX]])
+// CHECK-NEXT: linalg.generic
+// CHECK: br ^bb3
+// CHECK-NEXT: ^bb3(%[[ALLOC0:.*]]:{{.*}})
+// CHECK: linalg.copy(%[[ALLOC0]],
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case: Existing AllocOp with no users.
+// PromoteBuffersToStack expected behavior: It should convert it to an
+// AllocaOp.
+
+// CHECK-LABEL: func @emptyUsesValue
+func @emptyUsesValue(%arg0: memref<4xf32>) {
+ %0 = alloc() : memref<4xf32>
+ return
+}
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case:
+// bb0
+// / \
+// | bb1 <- Initial position of AllocOp
+// \ /
+// bb2
+// PromoteBuffersToStack expected behavior: It should convert it into an
+// AllocaOp.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @criticalEdge
+func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+ cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
+^bb1:
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ br ^bb2(%0 : memref<2xf32>)
+^bb2(%1: memref<2xf32>):
+ "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: cond_br {{.*}}
+// CHECK: ^bb1
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK: linalg.copy
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case:
+// bb0 <- Initial position of AllocOp
+// / \
+// | bb1
+// \ /
+// bb2
+// PromoteBuffersToStack expected behavior: It converts the alloc in an alloca.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @invCriticalEdge
+func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
+^bb1:
+ br ^bb2(%0 : memref<2xf32>)
+^bb2(%1: memref<2xf32>):
+ "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK: cond_br
+// CHECK: linalg.copy
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case:
+// bb0 <- Initial position of the first AllocOp
+// / \
+// bb1 bb2
+// \ /
+// bb3 <- Initial position of the second AllocOp
+// PromoteBuffersToStack expected behavior: It converts the allocs into allocas.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @ifElse
+func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ cond_br %arg0,
+ ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
+ ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
+^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
+ br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
+ br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
+ %7 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%5: memref<2xf32>)
+ outs(%7: memref<2xf32>) {
+ ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
+ %tmp2 = exp %gen2_arg0 : f32
+ linalg.yield %tmp2 : f32
+ }
+ "linalg.copy"(%7, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT: linalg.generic
+// CHECK: %[[ALLOCA1:.*]] = alloca()
+// CHECK: linalg.generic
+// CHECK: linalg.copy(%[[ALLOCA1]]
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case: No users for buffer in if-else CFG
+// bb0 <- Initial position of AllocOp
+// / \
+// bb1 bb2
+// \ /
+// bb3
+// PromoteBuffersToStack expected behavior: It converts the alloc into alloca.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @ifElseNoUsers
+func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ cond_br %arg0,
+ ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
+ ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
+^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
+ br ^bb3(%1, %2 : memref<2xf32>, memref<2xf32>)
+^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
+ br ^bb3(%3, %4 : memref<2xf32>, memref<2xf32>)
+^bb3(%5: memref<2xf32>, %6: memref<2xf32>):
+ "linalg.copy"(%arg1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK: return
+
+// -----
+
+// Test Case:
+// bb0 <- Initial position of the first AllocOp
+// / \
+// bb1 bb2
+// | / \
+// | bb3 bb4
+// \ \ /
+// \ /
+// bb5 <- Initial position of the second AllocOp
+// PromoteBuffersToStack expected behavior: The two allocs should be converted
+// into allocas.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @ifElseNested
+func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ cond_br %arg0,
+ ^bb1(%arg1, %0 : memref<2xf32>, memref<2xf32>),
+ ^bb2(%0, %arg1 : memref<2xf32>, memref<2xf32>)
+^bb1(%1: memref<2xf32>, %2: memref<2xf32>):
+ br ^bb5(%1, %2 : memref<2xf32>, memref<2xf32>)
+^bb2(%3: memref<2xf32>, %4: memref<2xf32>):
+ cond_br %arg0, ^bb3(%3 : memref<2xf32>), ^bb4(%4 : memref<2xf32>)
+^bb3(%5: memref<2xf32>):
+ br ^bb5(%5, %3 : memref<2xf32>, memref<2xf32>)
+^bb4(%6: memref<2xf32>):
+ br ^bb5(%3, %6 : memref<2xf32>, memref<2xf32>)
+^bb5(%7: memref<2xf32>, %8: memref<2xf32>):
+ %9 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%7: memref<2xf32>)
+ outs(%9: memref<2xf32>) {
+ ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
+ %tmp2 = exp %gen2_arg0 : f32
+ linalg.yield %tmp2 : f32
+ }
+ "linalg.copy"(%9, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT: linalg.generic
+// CHECK: %[[ALLOCA1:.*]] = alloca()
+// CHECK: linalg.generic
+// CHECK: linalg.copy(%[[ALLOCA1]]
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case: Dead operations in a single block.
+// PromoteBuffersToStack expected behavior: It converts the two AllocOps into
+// allocas.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @redundantOperations
+func @redundantOperations(%arg0: memref<2xf32>) {
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg0: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ %1 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%0: memref<2xf32>)
+ outs(%1: memref<2xf32>) {
+ ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
+ %tmp2 = exp %gen2_arg0 : f32
+ linalg.yield %tmp2 : f32
+ }
+ return
+}
+
+// CHECK: (%[[ARG0:.*]]: {{.*}})
+// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ARG0]]{{.*}} outs(%[[ALLOCA0]]
+// CHECK: %[[ALLOCA1:.*]] = alloca()
+// CHECK-NEXT: linalg.generic {{{.*}}} ins(%[[ALLOCA0]]{{.*}} outs(%[[ALLOCA1]]
+// CHECK: return
+
+// -----
+
+// Test Case:
+// bb0
+// / \
+// Initial pos of the 1st AllocOp -> bb1 bb2 <- Initial pos of the 2nd AllocOp
+// \ /
+// bb3
+// PromoteBuffersToStack expected behavior: Both AllocOps are converted into
+// allocas.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
+func @moving_alloc_and_inserting_missing_dealloc(
+ %cond: i1,
+ %arg0: memref<2xf32>,
+ %arg1: memref<2xf32>) {
+ cond_br %cond, ^bb1, ^bb2
+^bb1:
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg0: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ br ^exit(%0 : memref<2xf32>)
+^bb2:
+ %1 = alloc() : memref<2xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg0: memref<2xf32>)
+ outs(%1: memref<2xf32>) {
+ ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
+ %tmp2 = exp %gen2_arg0 : f32
+ linalg.yield %tmp2 : f32
+ }
+ br ^exit(%1 : memref<2xf32>)
+^exit(%arg2: memref<2xf32>):
+ "linalg.copy"(%arg2, %arg1) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: cond_br {{.*}}
+// CHECK: ^bb1
+// CHECK-NEXT: %{{.*}} = alloca()
+// CHECK: ^bb2
+// CHECK-NEXT: %{{.*}} = alloca()
+// CHECK: linalg.copy
+// CHECK-NEXT: return
+
+// -----
+
+// Test Case: Nested regions - This test defines a GenericOp inside the region
+// of another GenericOp.
+// PromoteBuffersToStack expected behavior: The AllocOps are converted into
+// allocas.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @nested_regions_and_cond_branch
+func @nested_regions_and_cond_branch(
+ %arg0: i1,
+ %arg1: memref<2xf32>,
+ %arg2: memref<2xf32>) {
+ cond_br %arg0, ^bb1, ^bb2
+^bb1:
+ br ^bb3(%arg1 : memref<2xf32>)
+^bb2:
+ %0 = alloc() : memref<2xf32>
+ linalg.generic {
+ indexing_maps = [#map0, #map0],
+ iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%0: memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %1 = alloc() : memref<2xf32>
+ linalg.generic {
+ indexing_maps = [#map0, #map0],
+ iterator_types = ["parallel"]}
+ ins(%arg1: memref<2xf32>)
+ outs(%1: memref<2xf32>) {
+ ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
+ %tmp2 = exp %gen2_arg0 : f32
+ linalg.yield %tmp2 : f32
+ }
+ %tmp1 = exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ br ^bb3(%0 : memref<2xf32>)
+^bb3(%1: memref<2xf32>):
+ "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: cond_br {{.*}}
+// CHECK: ^bb2
+// CHECK-NEXT: %[[ALLOCA0:.*]] = alloca()
+// CHECK: ^bb0
+// CHECK-NEXT: %[[ALLOCA1:.*]] = alloc()
+
+// -----
+
+// Test Case: buffer deallocation escaping
+// PromoteBuffersToStack expected behavior: The first alloc is returned, so
+// there is no conversion allowed. The second alloc is converted, since it
+// only remains in the scope of the function.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @memref_in_function_results
+func @memref_in_function_results(
+ %arg0: memref<5xf32>,
+ %arg1: memref<10xf32>,
+ %arg2: memref<5xf32>) -> (memref<10xf32>, memref<15xf32>) {
+ %x = alloc() : memref<15xf32>
+ %y = alloc() : memref<5xf32>
+ linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]}
+ ins(%arg0: memref<5xf32>)
+ outs(%y: memref<5xf32>) {
+ ^bb0(%arg3: f32, %arg4: f32):
+ %2 = exp %arg3 : f32
+ linalg.yield %2 : f32
+ }
+ linalg.copy(%y, %arg2) : memref<5xf32>, memref<5xf32>
+ return %arg1, %x : memref<10xf32>, memref<15xf32>
+}
+// CHECK: (%[[ARG0:.*]]: memref<5xf32>, %[[ARG1:.*]]: memref<10xf32>,
+// CHECK-SAME: %[[RESULT:.*]]: memref<5xf32>)
+// CHECK: %[[ALLOC:.*]] = alloc()
+// CHECK: %[[ALLOCA:.*]] = alloca()
+// CHECK: linalg.copy
+// CHECK: return %[[ARG1]], %[[ALLOC]]
+
+// -----
+
+// Test Case: nested region control flow
+// The allocation in the nested if branch cannot be converted to an alloca
+// due to its dynamic memory allocation behavior.
+
+// CHECK-LABEL: func @nested_region_control_flow
+func @nested_region_control_flow(
+ %arg0 : index,
+ %arg1 : index) -> memref<?x?xf32> {
+ %0 = cmpi "eq", %arg0, %arg1 : index
+ %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+ %2 = scf.if %0 -> (memref<?x?xf32>) {
+ scf.yield %1 : memref<?x?xf32>
+ } else {
+ %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+ scf.yield %1 : memref<?x?xf32>
+ }
+ return %2 : memref<?x?xf32>
+}
+
+// CHECK: %[[ALLOC0:.*]] = alloc(%arg0, %arg0)
+// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
+// CHECK: scf.yield %[[ALLOC0]]
+// CHECK: %[[ALLOC2:.*]] = alloc(%arg0, %arg1)
+// CHECK-NEXT: scf.yield %[[ALLOC0]]
+// CHECK: return %[[ALLOC1]]
+
+// -----
+
+// Test Case: nested region control flow within a region interface.
+// The alloc %0 does not need to be converted in this case since the
+// allocation finally escapes the method.
+
+// CHECK-LABEL: func @inner_region_control_flow
+func @inner_region_control_flow(%arg0 : index) -> memref<2x2xf32> {
+ %0 = alloc() : memref<2x2xf32>
+ %1 = test.region_if %0 : memref<2x2xf32> -> (memref<2x2xf32>) then {
+ ^bb0(%arg1 : memref<2x2xf32>):
+ test.region_if_yield %arg1 : memref<2x2xf32>
+ } else {
+ ^bb0(%arg1 : memref<2x2xf32>):
+ test.region_if_yield %arg1 : memref<2x2xf32>
+ } join {
+ ^bb0(%arg1 : memref<2x2xf32>):
+ test.region_if_yield %arg1 : memref<2x2xf32>
+ }
+ return %1 : memref<2x2xf32>
+}
+
+// CHECK: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC1:.*]] = test.region_if
+// CHECK-NEXT: ^bb0(%[[ALLOC2:.*]]:{{.*}}):
+// CHECK-NEXT: test.region_if_yield %[[ALLOC2]]
+// CHECK: ^bb0(%[[ALLOC3:.*]]:{{.*}}):
+// CHECK-NEXT: test.region_if_yield %[[ALLOC3]]
+// CHECK: ^bb0(%[[ALLOC4:.*]]:{{.*}}):
+// CHECK-NEXT: test.region_if_yield %[[ALLOC4]]
+// CHECK: return %[[ALLOC1]]
+
+// -----
+
+// Test Case: structured control-flow loop using a nested alloc.
+// Alloc %0 will be converted to an alloca. %3 is not transformed.
+
+// CHECK-LABEL: func @loop_alloc
+func @loop_alloc(
+ %lb: index,
+ %ub: index,
+ %step: index,
+ %buf: memref<2xf32>,
+ %res: memref<2xf32>) {
+ %0 = alloc() : memref<2xf32>
+ %1 = scf.for %i = %lb to %ub step %step
+ iter_args(%iterBuf = %buf) -> memref<2xf32> {
+ %2 = cmpi "eq", %i, %ub : index
+ %3 = alloc() : memref<2xf32>
+ scf.yield %3 : memref<2xf32>
+ }
+ "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca()
+// CHECK-NEXT: scf.for
+// CHECK: %[[ALLOC:.*]] = alloc()
+
+// -----
+
+// Test Case: structured control-flow loop with a nested if operation.
+// The loop yields buffers that have been defined outside of the loop and the
+// backeges only use the iteration arguments (or one of its aliases).
+// Therefore, we do not have to (and are not allowed to) free any buffers
+// that are passed via the backedges. The alloc is converted to an AllocaOp.
+
+// CHECK-LABEL: func @loop_nested_if_no_alloc
+func @loop_nested_if_no_alloc(
+ %lb: index,
+ %ub: index,
+ %step: index,
+ %buf: memref<2xf32>,
+ %res: memref<2xf32>) {
+ %0 = alloc() : memref<2xf32>
+ %1 = scf.for %i = %lb to %ub step %step
+ iter_args(%iterBuf = %buf) -> memref<2xf32> {
+ %2 = cmpi "eq", %i, %ub : index
+ %3 = scf.if %2 -> (memref<2xf32>) {
+ scf.yield %0 : memref<2xf32>
+ } else {
+ scf.yield %iterBuf : memref<2xf32>
+ }
+ scf.yield %3 : memref<2xf32>
+ }
+ "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+ return
+}
+
+// CHECK: %[[ALLOCA0:.*]] = alloca()
+// CHECK-NEXT: %[[ALLOCA1:.*]] = scf.for {{.*}} iter_args(%[[IALLOCA:.*]] =
+// CHECK: %[[ALLOCA2:.*]] = scf.if
+// CHECK: scf.yield %[[ALLOCA0]]
+// CHECK: scf.yield %[[IALLOCA]]
+// CHECK: scf.yield %[[ALLOCA2]]
+// CHECK: linalg.copy(%[[ALLOCA1]], %arg4)
+
+// -----
+
+// Test Case: structured control-flow loop with a nested if operation using
+// a deeply nested buffer allocation.
+// The allocs are not converted in this case.
+
+// CHECK-LABEL: func @loop_nested_if_alloc
+func @loop_nested_if_alloc(
+ %lb: index,
+ %ub: index,
+ %step: index,
+ %buf: memref<2xf32>) -> memref<2xf32> {
+ %0 = alloc() : memref<2xf32>
+ %1 = scf.for %i = %lb to %ub step %step
+ iter_args(%iterBuf = %buf) -> memref<2xf32> {
+ %2 = cmpi "eq", %i, %ub : index
+ %3 = scf.if %2 -> (memref<2xf32>) {
+ %4 = alloc() : memref<2xf32>
+ scf.yield %4 : memref<2xf32>
+ } else {
+ scf.yield %0 : memref<2xf32>
+ }
+ scf.yield %3 : memref<2xf32>
+ }
+ return %1 : memref<2xf32>
+}
+
+// CHECK: %[[ALLOC0:.*]] = alloc()
+// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}}
+// CHECK: %[[ALLOC2:.*]] = scf.if
+// CHECK: %[[ALLOC3:.*]] = alloc()
+// CHECK-NEXT: scf.yield %[[ALLOC3]]
+// CHECK: scf.yield %[[ALLOC0]]
+// CHECK: scf.yield %[[ALLOC2]]
+// CHECK: return %[[ALLOC1]]
+
+// -----
+
+// Test Case: The allocated buffer is too large and, hence, it is not
+// converted. In the actual implementation the largest size is 1KB.
+
+// CHECK-LABEL: func @large_buffer_allocation
+func @large_buffer_allocation(%arg0: memref<2048xf32>) {
+ %0 = alloc() : memref<2048xf32>
+ "linalg.copy"(%0, %arg0) : (memref<2048xf32>, memref<2048xf32>) -> ()
+ return
+}
+
+// CHECK-NEXT: %[[ALLOC:.*]] = alloc()
+// CHECK-NEXT: linalg.copy
More information about the Mlir-commits
mailing list