[Mlir-commits] [mlir] 8aeca73 - [MLIR] Added support for dynamic shaped allocas to promote-buffers-to-stack pass.
Julian Gross
llvmlistbot at llvm.org
Thu Dec 3 02:48:52 PST 2020
Author: Julian Gross
Date: 2020-12-03T11:47:49+01:00
New Revision: 8aeca73702d84590e32e404a2d3038399cf71418
URL: https://github.com/llvm/llvm-project/commit/8aeca73702d84590e32e404a2d3038399cf71418
DIFF: https://github.com/llvm/llvm-project/commit/8aeca73702d84590e32e404a2d3038399cf71418.diff
LOG: [MLIR] Added support for dynamic shaped allocas to promote-buffers-to-stack pass.
Extended promote buffers to stack pass to support dynamically shaped allocas.
The conversion is limited by the rank of the underlying tensor.
An option is added to the pass to adjust the given rank.
Differential Revision: https://reviews.llvm.org/D91969
Added:
Modified:
mlir/include/mlir/Transforms/Passes.h
mlir/include/mlir/Transforms/Passes.td
mlir/lib/Transforms/BufferOptimizations.cpp
mlir/test/Transforms/promote-buffers-to-stack.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 77d98ce79cca..c092d0120b60 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -42,9 +42,11 @@ std::unique_ptr<Pass> createBufferLoopHoistingPass();
/// Creates a pass that promotes heap-based allocations to stack-based ones.
/// Only buffers smaller than the provided size are promoted.
+/// Dynamic shaped buffers are promoted up to the given rank.
std::unique_ptr<Pass>
createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
- unsigned bitwidthOfIndexType = 64);
+ unsigned bitwidthOfIndexType = 64,
+ unsigned maxRankOfAllocatedMemRef = 1);
/// Creates a pass that finalizes a partial bufferization by removing remaining
/// tensor_load and tensor_to_memref operations.
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 29fe43fc0169..afad7cd5852f 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -207,17 +207,21 @@ def PromoteBuffersToStack : FunctionPass<"promote-buffers-to-stack"> {
let description = [{
This pass implements a simple algorithm to convert heap-based memory
allocations to stack-based ones. It uses a built-in heuristic to decide
- whether it makes sense to convert an allocation.
+ whether it makes sense to convert an allocation. Furthermore, dynamic
+ shaped buffers that are limited by the rank of the tensor can be
+ converted. They are only transformed if they are considered to be small.
}];
let constructor = "mlir::createPromoteBuffersToStackPass()";
let options = [
Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
/*default=*/"1024",
- "Define the maximum size in bytes to promote allocations to stack.">,
+ "Maximal size in bytes to promote allocations to stack.">,
Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
/*default=*/"64",
- "Define the bitwidth of the index type. Used for size estimation.">,
-
+ "Bitwidth of the index type. Used for size estimation.">,
+ Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
+ /*default=*/"1",
+ "Maximal memref rank to promote dynamic buffers.">,
];
}
diff --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Transforms/BufferOptimizations.cpp
index 0693c39d3f6b..651948a9ae22 100644
--- a/mlir/lib/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Transforms/BufferOptimizations.cpp
@@ -30,10 +30,23 @@ static bool isKnownControlFlowInterface(Operation *op) {
/// transformation is only applied to small buffers since large buffers could
/// exceed the stack space.
static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
- unsigned bitwidthOfIndexType) {
+ unsigned bitwidthOfIndexType,
+ unsigned maxRankOfAllocatedMemRef) {
auto type = alloc.getType().dyn_cast<ShapedType>();
- if (!type || !type.hasStaticShape())
+ if (!type || !alloc.getDefiningOp<AllocOp>())
return false;
+ if (!type.hasStaticShape()) {
+ // Check if the dynamic shape dimension of the alloc is produced by RankOp.
+ // If this is the case, it is likely to be small. Furthermore, the dimension
+ // is limited to the maximum rank of the allocated memref to avoid large
+ // values by multiplying several small values.
+ if (type.getRank() <= maxRankOfAllocatedMemRef) {
+ return llvm::all_of(
+ alloc.getDefiningOp()->getOperands(),
+ [&](Value operand) { return operand.getDefiningOp<RankOp>(); });
+ }
+ return false;
+ }
// For index types, use the provided size, as the type does not know.
unsigned int bitwidth = type.getElementType().isIndex()
? bitwidthOfIndexType
@@ -286,7 +299,8 @@ class BufferPlacementPromotion : BufferPlacementTransformationBase {
: BufferPlacementTransformationBase(op) {}
/// Promote buffers to stack-based allocations.
- void promote(unsigned maximumSize, unsigned bitwidthOfIndexType) {
+ void promote(unsigned maximumSize, unsigned bitwidthOfIndexType,
+ unsigned maxRankOfAllocatedMemRef) {
for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
Value alloc = std::get<0>(entry);
Operation *dealloc = std::get<1>(entry);
@@ -294,8 +308,9 @@ class BufferPlacementPromotion : BufferPlacementTransformationBase {
// The transformation is done if the allocation is limited to a given
// size. Furthermore, a deallocation must not be defined for this
// allocation entry and a parent allocation scope must exist.
- if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType) || dealloc ||
- !hasAllocationScope(alloc, aliases))
+ if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType,
+ maxRankOfAllocatedMemRef) ||
+ dealloc || !hasAllocationScope(alloc, aliases))
continue;
Operation *startOperation = BufferPlacementAllocs::getStartOperation(
@@ -303,12 +318,13 @@ class BufferPlacementPromotion : BufferPlacementTransformationBase {
// Build a new alloca that is associated with its parent
// `AutomaticAllocationScope` determined during the initialization phase.
OpBuilder builder(startOperation);
- auto alloca = builder.create<AllocaOp>(
- alloc.getLoc(), alloc.getType().cast<MemRefType>());
+ Operation *allocOp = alloc.getDefiningOp();
+ Operation *alloca = builder.create<AllocaOp>(
+ alloc.getLoc(), alloc.getType().cast<MemRefType>(),
+ allocOp->getOperands());
// Replace the original alloc by a newly created alloca.
- Operation *allocOp = alloc.getDefiningOp();
- allocOp->replaceAllUsesWith(alloca.getOperation());
+ allocOp->replaceAllUsesWith(alloca);
allocOp->erase();
}
}
@@ -347,15 +363,18 @@ struct PromoteBuffersToStackPass
: PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
- unsigned bitwidthOfIndexType) {
+ unsigned bitwidthOfIndexType,
+ unsigned maxRankOfAllocatedMemRef) {
this->maxAllocSizeInBytes = maxAllocSizeInBytes;
this->bitwidthOfIndexType = bitwidthOfIndexType;
+ this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
}
void runOnFunction() override {
// Move all allocation nodes and convert candidates into allocas.
BufferPlacementPromotion optimizer(getFunction());
- optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType);
+ optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType,
+ this->maxRankOfAllocatedMemRef);
}
};
@@ -371,7 +390,8 @@ std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
std::unique_ptr<Pass>
mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
- unsigned bitwidthOfIndexType) {
- return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
- bitwidthOfIndexType);
+ unsigned bitwidthOfIndexType,
+ unsigned maxRankOfAllocatedMemRef) {
+ return std::make_unique<PromoteBuffersToStackPass>(
+ maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
}
diff --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir
index 903b1f8cb748..8fabd4fe6a01 100644
--- a/mlir/test/Transforms/promote-buffers-to-stack.mlir
+++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir
@@ -1,6 +1,7 @@
// RUN: mlir-opt -promote-buffers-to-stack -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix DEFINDEX
// RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=128" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix BIGINDEX
// RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=64" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix LOWLIMIT
+// RUN: mlir-opt -promote-buffers-to-stack="max-rank-of-allocated-memref=2" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix RANK
// This file checks the behavior of PromoteBuffersToStack pass for converting
// AllocOps into AllocaOps, if possible.
@@ -14,8 +15,6 @@
// PromoteBuffersToStack expected behavior: It should convert %0 into an
// AllocaOp.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @condBranch
func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
cond_br %arg0, ^bb1, ^bb2
@@ -47,8 +46,6 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
// PromoteBuffersToStack expected behavior:
// Since the alloc has dynamic type, it is not converted into an alloca.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @condBranchDynamicType
func @condBranchDynamicType(
%arg0: i1,
@@ -79,6 +76,41 @@ func @condBranchDynamicType(
// -----
+// CHECK-LABEL: func @dynamicRanked
+func @dynamicRanked(%tensor: tensor<*xf32>) {
+ %0 = rank %tensor : tensor<*xf32>
+ %1 = alloc(%0) : memref<?xindex>
+ return
+}
+
+// CHECK-NEXT: %[[RANK:.*]] = rank
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca(%[[RANK]])
+
+// -----
+
+// CHECK-LABEL: func @dynamicRanked2D
+func @dynamicRanked2D(%tensor: tensor<*xf32>) {
+ %0 = rank %tensor : tensor<*xf32>
+ %1 = alloc(%0, %0) : memref<?x?xindex>
+ return
+}
+
+// CHECK-NEXT: %[[RANK:.*]] = rank
+// RANK-NEXT: %[[ALLOC:.*]] = alloca(%[[RANK]], %[[RANK]])
+// DEFINDEX-NEXT: %[[ALLOC:.*]] = alloc(%[[RANK]], %[[RANK]])
+
+// -----
+
+// CHECK-LABEL: func @dynamicNoRank
+func @dynamicNoRank(%arg0: index) {
+ %0 = alloc(%arg0) : memref<?xindex>
+ return
+}
+
+// CHECK-NEXT: %[[ALLOC:.*]] = alloc
+
+// -----
+
// Test Case: Existing AllocOp with no users.
// PromoteBuffersToStack expected behavior: It should convert it to an
// AllocaOp.
@@ -102,8 +134,6 @@ func @emptyUsesValue(%arg0: memref<4xf32>) {
// PromoteBuffersToStack expected behavior: It should convert it into an
// AllocaOp.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @criticalEdge
func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
@@ -132,8 +162,6 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
// bb2
// PromoteBuffersToStack expected behavior: It converts the alloc in an alloca.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @invCriticalEdge
func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = alloc() : memref<2xf32>
@@ -161,8 +189,6 @@ func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
// bb3 <- Initial position of the second AllocOp
// PromoteBuffersToStack expected behavior: It converts the allocs into allocas.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @ifElse
func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = alloc() : memref<2xf32>
@@ -198,8 +224,6 @@ func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
// bb3
// PromoteBuffersToStack expected behavior: It converts the alloc into alloca.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @ifElseNoUsers
func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = alloc() : memref<2xf32>
@@ -233,8 +257,6 @@ func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
// PromoteBuffersToStack expected behavior: The two allocs should be converted
// into allocas.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @ifElseNested
func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
%0 = alloc() : memref<2xf32>
@@ -270,8 +292,6 @@ func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
// PromoteBuffersToStack expected behavior: It converts the two AllocOps into
// allocas.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @redundantOperations
func @redundantOperations(%arg0: memref<2xf32>) {
%0 = alloc() : memref<2xf32>
@@ -299,8 +319,6 @@ func @redundantOperations(%arg0: memref<2xf32>) {
// PromoteBuffersToStack expected behavior: Both AllocOps are converted into
// allocas.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
func @moving_alloc_and_inserting_missing_dealloc(
%cond: i1,
@@ -335,8 +353,6 @@ func @moving_alloc_and_inserting_missing_dealloc(
// PromoteBuffersToStack expected behavior: The AllocOps are converted into
// allocas.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @nested_regions_and_cond_branch
func @nested_regions_and_cond_branch(
%arg0: i1,
@@ -373,8 +389,6 @@ func @nested_regions_and_cond_branch(
// there is no conversion allowed. The second alloc is converted, since it
// only remains in the scope of the function.
-#map0 = affine_map<(d0) -> (d0)>
-
// CHECK-LABEL: func @memref_in_function_results
func @memref_in_function_results(
%arg0: memref<5xf32>,
@@ -583,4 +597,5 @@ func @indexElementType() {
// DEFINDEX-NEXT: alloca()
// BIGINDEX-NEXT: alloca()
// LOWLIMIT-NEXT: alloc()
+// RANK-NEXT: alloca()
// CHECK-NEXT: return
More information about the Mlir-commits
mailing list