[Mlir-commits] [mlir] 8aeca73 - [MLIR] Added support for dynamic shaped allocas to promote-buffers-to-stack pass.

Thu Dec 3 02:48:52 PST 2020

Author: Julian Gross
Date: 2020-12-03T11:47:49+01:00
New Revision: 8aeca73702d84590e32e404a2d3038399cf71418

URL: https://github.com/llvm/llvm-project/commit/8aeca73702d84590e32e404a2d3038399cf71418
DIFF: https://github.com/llvm/llvm-project/commit/8aeca73702d84590e32e404a2d3038399cf71418.diff

LOG: [MLIR] Added support for dynamic shaped allocas to promote-buffers-to-stack pass.

Extended promote buffers to stack pass to support dynamically shaped allocas.
The conversion is limited by the rank of the underlying tensor.
An option is added to the pass to adjust the given rank.

Differential Revision: https://reviews.llvm.org/D91969

Added: 
    

Modified: 
    mlir/include/mlir/Transforms/Passes.h
    mlir/include/mlir/Transforms/Passes.td
    mlir/lib/Transforms/BufferOptimizations.cpp
    mlir/test/Transforms/promote-buffers-to-stack.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 77d98ce79cca..c092d0120b60 100644

--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -42,9 +42,11 @@ std::unique_ptr<Pass> createBufferLoopHoistingPass();
 
 /// Creates a pass that promotes heap-based allocations to stack-based ones.
 /// Only buffers smaller than the provided size are promoted.
+/// Dynamic shaped buffers are promoted up to the given rank.
 std::unique_ptr<Pass>
 createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
-                                unsigned bitwidthOfIndexType = 64);
+                                unsigned bitwidthOfIndexType = 64,
+                                unsigned maxRankOfAllocatedMemRef = 1);
 
 /// Creates a pass that finalizes a partial bufferization by removing remaining
 /// tensor_load and tensor_to_memref operations.

diff  --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 29fe43fc0169..afad7cd5852f 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -207,17 +207,21 @@ def PromoteBuffersToStack : FunctionPass<"promote-buffers-to-stack"> {
   let description = [{
     This pass implements a simple algorithm to convert heap-based memory
     allocations to stack-based ones. It uses a built-in heuristic to decide
-    whether it makes sense to convert an allocation.
+    whether it makes sense to convert an allocation. Furthermore, dynamic
+    shaped buffers that are limited by the rank of the tensor can be
+    converted. They are only transformed if they are considered to be small.
   }];
   let constructor = "mlir::createPromoteBuffersToStackPass()";
   let options = [
     Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
            /*default=*/"1024",
-           "Define the maximum size in bytes to promote allocations to stack.">,
+           "Maximal size in bytes to promote allocations to stack.">,
     Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
            /*default=*/"64",
-           "Define the bitwidth of the index type. Used for size estimation.">,
-
+           "Bitwidth of the index type. Used for size estimation.">,
+    Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
+           /*default=*/"1",
+           "Maximal memref rank to promote dynamic buffers.">,
   ];
 }
 

diff  --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Transforms/BufferOptimizations.cpp
index 0693c39d3f6b..651948a9ae22 100644
--- a/mlir/lib/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Transforms/BufferOptimizations.cpp
@@ -30,10 +30,23 @@ static bool isKnownControlFlowInterface(Operation *op) {
 /// transformation is only applied to small buffers since large buffers could
 /// exceed the stack space.
 static bool isSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
-                         unsigned bitwidthOfIndexType) {
+                         unsigned bitwidthOfIndexType,
+                         unsigned maxRankOfAllocatedMemRef) {
   auto type = alloc.getType().dyn_cast<ShapedType>();
-  if (!type || !type.hasStaticShape())
+  if (!type || !alloc.getDefiningOp<AllocOp>())
     return false;
+  if (!type.hasStaticShape()) {
+    // Check if the dynamic shape dimension of the alloc is produced by RankOp.
+    // If this is the case, it is likely to be small. Furthermore, the dimension
+    // is limited to the maximum rank of the allocated memref to avoid large
+    // values by multiplying several small values.
+    if (type.getRank() <= maxRankOfAllocatedMemRef) {
+      return llvm::all_of(
+          alloc.getDefiningOp()->getOperands(),
+          [&](Value operand) { return operand.getDefiningOp<RankOp>(); });
+    }
+    return false;
+  }
   // For index types, use the provided size, as the type does not know.
   unsigned int bitwidth = type.getElementType().isIndex()
                               ? bitwidthOfIndexType
@@ -286,7 +299,8 @@ class BufferPlacementPromotion : BufferPlacementTransformationBase {
       : BufferPlacementTransformationBase(op) {}
 
   /// Promote buffers to stack-based allocations.
-  void promote(unsigned maximumSize, unsigned bitwidthOfIndexType) {
+  void promote(unsigned maximumSize, unsigned bitwidthOfIndexType,
+               unsigned maxRankOfAllocatedMemRef) {
     for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
       Value alloc = std::get<0>(entry);
       Operation *dealloc = std::get<1>(entry);
@@ -294,8 +308,9 @@ class BufferPlacementPromotion : BufferPlacementTransformationBase {
       // The transformation is done if the allocation is limited to a given
       // size. Furthermore, a deallocation must not be defined for this
       // allocation entry and a parent allocation scope must exist.
-      if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType) || dealloc ||
-          !hasAllocationScope(alloc, aliases))
+      if (!isSmallAlloc(alloc, maximumSize, bitwidthOfIndexType,
+                        maxRankOfAllocatedMemRef) ||
+          dealloc || !hasAllocationScope(alloc, aliases))
         continue;
 
       Operation *startOperation = BufferPlacementAllocs::getStartOperation(
@@ -303,12 +318,13 @@ class BufferPlacementPromotion : BufferPlacementTransformationBase {
       // Build a new alloca that is associated with its parent
       // `AutomaticAllocationScope` determined during the initialization phase.
       OpBuilder builder(startOperation);
-      auto alloca = builder.create<AllocaOp>(
-          alloc.getLoc(), alloc.getType().cast<MemRefType>());
+      Operation *allocOp = alloc.getDefiningOp();
+      Operation *alloca = builder.create<AllocaOp>(
+          alloc.getLoc(), alloc.getType().cast<MemRefType>(),
+          allocOp->getOperands());
 
       // Replace the original alloc by a newly created alloca.
-      Operation *allocOp = alloc.getDefiningOp();
-      allocOp->replaceAllUsesWith(alloca.getOperation());
+      allocOp->replaceAllUsesWith(alloca);
       allocOp->erase();
     }
   }
@@ -347,15 +363,18 @@ struct PromoteBuffersToStackPass
     : PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
 
   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
-                            unsigned bitwidthOfIndexType) {
+                            unsigned bitwidthOfIndexType,
+                            unsigned maxRankOfAllocatedMemRef) {
     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
     this->bitwidthOfIndexType = bitwidthOfIndexType;
+    this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
   }
 
   void runOnFunction() override {
     // Move all allocation nodes and convert candidates into allocas.
     BufferPlacementPromotion optimizer(getFunction());
-    optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType);
+    optimizer.promote(this->maxAllocSizeInBytes, this->bitwidthOfIndexType,
+                      this->maxRankOfAllocatedMemRef);
   }
 };
 
@@ -371,7 +390,8 @@ std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
 
 std::unique_ptr<Pass>
 mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
-                                      unsigned bitwidthOfIndexType) {
-  return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
-                                                     bitwidthOfIndexType);
+                                      unsigned bitwidthOfIndexType,
+                                      unsigned maxRankOfAllocatedMemRef) {
+  return std::make_unique<PromoteBuffersToStackPass>(
+      maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
 }

diff  --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir
index 903b1f8cb748..8fabd4fe6a01 100644
--- a/mlir/test/Transforms/promote-buffers-to-stack.mlir
+++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir
@@ -1,6 +1,7 @@
 // RUN: mlir-opt -promote-buffers-to-stack -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix DEFINDEX
 // RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=128" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix BIGINDEX
 // RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=64" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix LOWLIMIT
+// RUN: mlir-opt -promote-buffers-to-stack="max-rank-of-allocated-memref=2" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix RANK
 
 // This file checks the behavior of PromoteBuffersToStack pass for converting
 // AllocOps into AllocaOps, if possible.
@@ -14,8 +15,6 @@
 // PromoteBuffersToStack expected behavior: It should convert %0 into an
 // AllocaOp.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @condBranch
 func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cond_br %arg0, ^bb1, ^bb2
@@ -47,8 +46,6 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // PromoteBuffersToStack expected behavior:
 // Since the alloc has dynamic type, it is not converted into an alloca.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @condBranchDynamicType
 func @condBranchDynamicType(
   %arg0: i1,
@@ -79,6 +76,41 @@ func @condBranchDynamicType(
 
 // -----
 
+// CHECK-LABEL: func @dynamicRanked
+func @dynamicRanked(%tensor: tensor<*xf32>) {
+  %0 = rank %tensor : tensor<*xf32>
+  %1 = alloc(%0) : memref<?xindex>
+  return
+}
+
+// CHECK-NEXT: %[[RANK:.*]] = rank
+// CHECK-NEXT: %[[ALLOCA:.*]] = alloca(%[[RANK]])
+
+// -----
+
+// CHECK-LABEL: func @dynamicRanked2D
+func @dynamicRanked2D(%tensor: tensor<*xf32>) {
+  %0 = rank %tensor : tensor<*xf32>
+  %1 = alloc(%0, %0) : memref<?x?xindex>
+  return
+}
+
+// CHECK-NEXT: %[[RANK:.*]] = rank
+//  RANK-NEXT: %[[ALLOC:.*]] = alloca(%[[RANK]], %[[RANK]])
+// DEFINDEX-NEXT: %[[ALLOC:.*]] = alloc(%[[RANK]], %[[RANK]])
+
+// -----
+
+// CHECK-LABEL: func @dynamicNoRank
+func @dynamicNoRank(%arg0: index) {
+  %0 = alloc(%arg0) : memref<?xindex>
+  return
+}
+
+// CHECK-NEXT: %[[ALLOC:.*]] = alloc
+
+// -----
+
 // Test Case: Existing AllocOp with no users.
 // PromoteBuffersToStack expected behavior: It should convert it to an
 // AllocaOp.
@@ -102,8 +134,6 @@ func @emptyUsesValue(%arg0: memref<4xf32>) {
 // PromoteBuffersToStack expected behavior: It should convert it into an
 // AllocaOp.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @criticalEdge
 func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   cond_br %arg0, ^bb1, ^bb2(%arg1 : memref<2xf32>)
@@ -132,8 +162,6 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 //    bb2
 // PromoteBuffersToStack expected behavior: It converts the alloc in an alloca.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @invCriticalEdge
 func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -161,8 +189,6 @@ func @invCriticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 //    bb3 <- Initial position of the second AllocOp
 // PromoteBuffersToStack expected behavior: It converts the allocs into allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @ifElse
 func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -198,8 +224,6 @@ func @ifElse(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 //    bb3
 // PromoteBuffersToStack expected behavior: It converts the alloc into alloca.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @ifElseNoUsers
 func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -233,8 +257,6 @@ func @ifElseNoUsers(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // PromoteBuffersToStack expected behavior: The two allocs should be converted
 // into allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @ifElseNested
 func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -270,8 +292,6 @@ func @ifElseNested(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
 // PromoteBuffersToStack expected behavior: It converts the two AllocOps into
 // allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @redundantOperations
 func @redundantOperations(%arg0: memref<2xf32>) {
   %0 = alloc() : memref<2xf32>
@@ -299,8 +319,6 @@ func @redundantOperations(%arg0: memref<2xf32>) {
 // PromoteBuffersToStack expected behavior: Both AllocOps are converted into
 // allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @moving_alloc_and_inserting_missing_dealloc
 func @moving_alloc_and_inserting_missing_dealloc(
   %cond: i1,
@@ -335,8 +353,6 @@ func @moving_alloc_and_inserting_missing_dealloc(
 // PromoteBuffersToStack expected behavior: The AllocOps are converted into
 // allocas.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @nested_regions_and_cond_branch
 func @nested_regions_and_cond_branch(
   %arg0: i1,
@@ -373,8 +389,6 @@ func @nested_regions_and_cond_branch(
 // there is no conversion allowed. The second alloc is converted, since it
 // only remains in the scope of the function.
 
-#map0 = affine_map<(d0) -> (d0)>
-
 // CHECK-LABEL: func @memref_in_function_results
 func @memref_in_function_results(
   %arg0: memref<5xf32>,
@@ -583,4 +597,5 @@ func @indexElementType() {
 // DEFINDEX-NEXT: alloca()
 // BIGINDEX-NEXT: alloca()
 // LOWLIMIT-NEXT: alloc()
+// RANK-NEXT: alloca()
 // CHECK-NEXT: return