[Mlir-commits] [mlir] c076fa1 - [mlir][bufferize] Deallocate returned buffers with `BufferDeallocation`

Matthias Springer llvmlistbot at llvm.org
Wed Mar 16 07:13:44 PDT 2022


Author: Matthias Springer
Date: 2022-03-16T23:13:34+09:00
New Revision: c076fa1c447bfbc05226e0782f8b8ca3a8ae78bc

URL: https://github.com/llvm/llvm-project/commit/c076fa1c447bfbc05226e0782f8b8ca3a8ae78bc
DIFF: https://github.com/llvm/llvm-project/commit/c076fa1c447bfbc05226e0782f8b8ca3a8ae78bc.diff

LOG: [mlir][bufferize] Deallocate returned buffers with `BufferDeallocation`

New buffer allocations can now be returned/yielded from blocks with `allow-return-allocs`. One-Shot Bufferize deallocates all buffers at the end of the block. If this is not possible (because the buffer escapes the block), this is now done by the existing BufferDeallocation pass.

Differential Revision: https://reviews.llvm.org/D121527

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
    mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
    mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
    mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
    mlir/include/mlir/Dialect/Linalg/Passes.td
    mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
    mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
    mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
    mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
    mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
    mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 68136fda97384..861c92d0d7686 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -516,11 +516,16 @@ LogicalResult createDealloc(OpBuilder &b, Location loc, Value allocatedBuffer,
 LogicalResult createMemCpy(OpBuilder &b, Location loc, Value from, Value to,
                            const BufferizationOptions &options);
 
-/// Finalize all buffer allocations.
-/// * Hoist buffer allocations as much as possible.
-/// * Create alloc/dealloc ops as specified by the bufferization options.
-LogicalResult finalizeBuffers(Operation *op,
-                              const BufferizationOptions &options);
+/// Try to hoist all new buffer allocations until the next hoisting barrier.
+LogicalResult hoistBufferAllocations(Operation *op,
+                                     const BufferizationOptions &options);
+
+/// Create alloc/dealloc ops as specified in the bufferization options. If
+/// `onlyLeakingAlloc`, only those buffer allocations are processed for which no
+/// buffer deallocation can be created.
+LogicalResult createAllocDeallocOps(Operation *op,
+                                    const BufferizationOptions &options,
+                                    bool onlyLeakingAllocs = false);
 } // namespace bufferization
 } // namespace mlir
 

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
index 85a70a42d283a..be87bab0013e2 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
@@ -90,6 +90,12 @@ BufferizationOptions getPartialBufferizationOptions();
 /// Note: This function overload is useful for extending the bufferization.
 LogicalResult bufferizeOp(Operation *op,
                           BufferizationState &bufferizationState);
+
+/// Finalize all buffer allocations.
+/// * Hoist buffer allocations as much as possible.
+/// * Create alloc/dealloc ops as specified by the bufferization options.
+LogicalResult finalizeBuffers(Operation *op,
+                              const BufferizationOptions &options);
 } // namespace bufferization
 } // namespace mlir
 

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index bff0b197544af..164c8106909c5 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -15,6 +15,9 @@ struct OneShotBufferizationOptions;
 /// buffers.
 std::unique_ptr<Pass> createBufferDeallocationPass();
 
+/// Run buffer deallocation.
+LogicalResult deallocateBuffers(Operation *op);
+
 /// Creates a pass that moves allocations upwards to reduce the number of
 /// required copies that are inserted during the BufferDeallocation pass.
 std::unique_ptr<Pass> createBufferHoistingPass();
@@ -55,6 +58,9 @@ createPromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc);
 // Registration
 //===----------------------------------------------------------------------===//
 
+/// Register external models for AllocationOpInterface.
+void registerAllocationOpInterfaceExternalModels(DialectRegistry &registry);
+
 /// Generate the code for registering passes.
 #define GEN_PASS_REGISTRATION
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"

diff  --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 0a7a7f7a32611..a5970ee7b5a01 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -170,11 +170,11 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
     example, `tensor.generate` is not in destination-passing style and always
     results in a new buffer allocation.
 
-    One-Shot Bufferize deallocates all buffers that it allocates. Yielding newly
-    allocated buffers from a block is not supported yet and such IR will be
-    rejected. For testing purposes and compatibility with partial bufferization,
-    One-Shot Bufferize can be run with `allow-return-allocs=1 create-dealloc=0`
-    to allow such IR.
+    One-Shot Bufferize deallocates all buffers that it allocates. Returning or
+    yielding newly allocated buffers from a block can lead to bad performance
+    because additional buffer copies would be inserted. By default, such IR is
+    rejected by One-Shot Bufferize. If performance is not important, such IR can
+    be allowed with `allow-return-allocs=1`.
 
     One-Shot Bufferize will by default reject IR that contains non-bufferizable
     op, i.e., ops that do not implemement BufferizableOpInterface. Such IR can
@@ -204,7 +204,7 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
   let options = [
     Option<"allowReturnAllocs", "allow-return-allocs", "bool",
             /*default=*/"false",
-           "Allows the return of new allocations (for testing purposes only)">,
+           "Allows returning/yielding new allocations from a block.">,
     Option<"allowUnknownOps", "allow-unknown-ops", "bool",
            /*default=*/"false",
            "Allows unknown (not bufferizable) ops in the input IR.">,

diff  --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
index ee2516e9d47a4..b84b99722fa7a 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -45,7 +45,7 @@ def LinalgComprehensiveModuleBufferize :
            "Annotates IR with RaW conflicts. Requires test-analysis-only.">,
     Option<"allowReturnAllocs", "allow-return-allocs", "bool",
             /*default=*/"false",
-           "Allows the return of new allocations (for testing purposes only)">,
+           "Allows returning/yielding new allocations from a block.">,
     Option<"allowUnknownOps", "allow-unknown-ops", "bool",
            /*default=*/"false",
            "Allows unknown (not bufferizable) ops in the input IR.">,

diff  --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 7d21c76d58502..11a3f068d6b4d 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -508,8 +508,10 @@ LogicalResult bufferization::createMemCpy(OpBuilder &b, Location loc,
   return success();
 }
 
-static LogicalResult
-createAllocDeallocOps(Operation *op, const BufferizationOptions &options) {
+LogicalResult
+bufferization::createAllocDeallocOps(Operation *op,
+                                     const BufferizationOptions &options,
+                                     bool onlyLeakingAllocs) {
   IRRewriter rewriter(op->getContext());
 
   // Bufferization creates memref.alloca ops. After bufferization, these must be
@@ -518,7 +520,11 @@ createAllocDeallocOps(Operation *op, const BufferizationOptions &options) {
     // Ignore memref.alloca ops that were not created by the bufferization.
     if (!allocaOp->hasAttr(kBufferAllocationAttr))
       return WalkResult::skip();
+    // If `onlyLeakingAllocs`, process only ops that are marked as
+    // "skip dealloc".
     bool skipDealloc = allocaOp->hasAttr(kSkipDeallocAttr);
+    if (onlyLeakingAllocs && !skipDealloc)
+      return WalkResult::skip();
 
     // Create alloc.
     Block *block = allocaOp->getBlock();
@@ -547,8 +553,9 @@ createAllocDeallocOps(Operation *op, const BufferizationOptions &options) {
 
 /// Try to hoist all new buffer allocations until the next hoisting barrier.
 // TODO: Consolidate this function with the existing buffer hoisting pass.
-static LogicalResult
-hoistBufferAllocations(Operation *op, const BufferizationOptions &options) {
+LogicalResult
+bufferization::hoistBufferAllocations(Operation *op,
+                                      const BufferizationOptions &options) {
   // Nothing to do if allocation hoisting is deactivated.
   if (!options.hoistAllocations)
     return success();
@@ -601,17 +608,6 @@ hoistBufferAllocations(Operation *op, const BufferizationOptions &options) {
   return success();
 }
 
-LogicalResult
-bufferization::finalizeBuffers(Operation *op,
-                               const BufferizationOptions &options) {
-  if (failed(hoistBufferAllocations(op, options)))
-    return failure();
-  if (failed(createAllocDeallocOps(op, options)))
-    return failure();
-
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // Bufferization-specific BlockAndValueMapping support with debugging.
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
index 6d04dd4e92e0d..63073dd1e9ebd 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
@@ -77,13 +77,15 @@ walkReturnOperations(Region *region,
   return success();
 }
 
-/// Checks if all operations in a given region that have at least one attached
-/// region implement the RegionBranchOpInterface. This is not required in edge
-/// cases, where we have a single attached region and the parent operation has
-/// no results.
-static bool validateSupportedControlFlow(Region &region) {
-  bool success = true;
-  region.walk([&success](Operation *operation) {
+/// Checks if all operations that have at least one attached region implement
+/// the RegionBranchOpInterface. This is not required in edge cases, where we
+/// have a single attached region and the parent operation has no results.
+static bool validateSupportedControlFlow(Operation *op) {
+  WalkResult result = op->walk([&](Operation *operation) {
+    // Only check ops that are inside a function.
+    if (!operation->getParentOfType<FuncOp>())
+      return WalkResult::advance();
+
     auto regions = operation->getRegions();
     // Walk over all operations in a region and check if the operation has at
     // least one region and implements the RegionBranchOpInterface. If there
@@ -96,10 +98,11 @@ static bool validateSupportedControlFlow(Region &region) {
         !dyn_cast<RegionBranchOpInterface>(operation)) {
       operation->emitError("All operations with attached regions need to "
                            "implement the RegionBranchOpInterface.");
-      success = false;
     }
+
+    return WalkResult::advance();
   });
-  return success;
+  return !result.wasSkipped();
 }
 
 namespace {
@@ -639,7 +642,7 @@ struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<bufferization::BufferizationDialect>();
     registry.insert<memref::MemRefDialect>();
-    registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>();
+    registerAllocationOpInterfaceExternalModels(registry);
   }
 
   void runOnOperation() override {
@@ -647,32 +650,53 @@ struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
     if (func.isExternal())
       return;
 
-    // Ensure that there are supported loops only.
-    Backedges backedges(func);
-    if (backedges.size()) {
-      func.emitError("Only structured control-flow loops are supported.");
-      return signalPassFailure();
-    }
-
-    // Check that the control flow structures are supported.
-    if (!validateSupportedControlFlow(func.getRegion()))
-      return signalPassFailure();
+    if (failed(deallocateBuffers(func)))
+      signalPassFailure();
+  }
+};
 
-    // Gather all required allocation nodes and prepare the deallocation phase.
-    BufferDeallocation deallocation(func);
+} // namespace
 
-    // Check for supported AllocationOpInterface implementations and prepare the
-    // internal deallocation pass.
-    if (failed(deallocation.prepare()))
-      return signalPassFailure();
+LogicalResult bufferization::deallocateBuffers(Operation *op) {
+  if (isa<ModuleOp>(op)) {
+    WalkResult result = op->walk([&](FuncOp funcOp) {
+      if (failed(deallocateBuffers(funcOp)))
+        return WalkResult::interrupt();
+      return WalkResult::advance();
+    });
+    return success(!result.wasInterrupted());
+  }
 
-    // Place all required temporary clone and dealloc nodes.
-    if (failed(deallocation.deallocate()))
-      return signalPassFailure();
+  // Ensure that there are supported loops only.
+  Backedges backedges(op);
+  if (backedges.size()) {
+    op->emitError("Only structured control-flow loops are supported.");
+    return failure();
   }
-};
 
-} // namespace
+  // Check that the control flow structures are supported.
+  if (!validateSupportedControlFlow(op))
+    return failure();
+
+  // Gather all required allocation nodes and prepare the deallocation phase.
+  BufferDeallocation deallocation(op);
+
+  // Check for supported AllocationOpInterface implementations and prepare the
+  // internal deallocation pass.
+  if (failed(deallocation.prepare()))
+    return failure();
+
+  // Place all required temporary clone and dealloc nodes.
+  if (failed(deallocation.deallocate()))
+    return failure();
+
+  return success();
+}
+
+void bufferization::registerAllocationOpInterfaceExternalModels(
+    DialectRegistry &registry) {
+  registry.addOpInterface<memref::AllocOp, DefaultAllocationInterface>();
+}
 
 //===----------------------------------------------------------------------===//
 // BufferDeallocationPass construction

diff  --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 941b61ed722e1..6a07a40302c0d 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -157,7 +157,9 @@ struct OneShotBufferizePass
       : options(options) {}
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<bufferization::BufferizationDialect>();
+    registry
+        .insert<bufferization::BufferizationDialect, memref::MemRefDialect>();
+    registerAllocationOpInterfaceExternalModels(registry);
   }
 
   void runOnOperation() override {
@@ -299,6 +301,21 @@ checkBufferizationResult(Operation *op, const BufferizationOptions &options) {
   return success();
 }
 
+LogicalResult
+bufferization::finalizeBuffers(Operation *op,
+                               const BufferizationOptions &options) {
+  if (failed(hoistBufferAllocations(op, options)))
+    return failure();
+  if (failed(createAllocDeallocOps(op, options, /*onlyLeakingAllocs=*/true)))
+    return failure();
+  if (options.createDeallocs && failed(deallocateBuffers(op)))
+    return failure();
+  if (failed(createAllocDeallocOps(op, options)))
+    return failure();
+
+  return success();
+}
+
 LogicalResult bufferization::bufferizeOp(Operation *op,
                                          const AnalysisState &analysisState) {
   BufferizationState bufferizationState(analysisState);

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 47e0600b268fe..d299f7deea66e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h"
 #include "mlir/Dialect/Linalg/Passes.h"
@@ -51,6 +52,7 @@ struct LinalgComprehensiveModuleBufferize
                 vector::VectorDialect, scf::SCFDialect,
                 arith::ArithmeticDialect, func::FuncDialect, AffineDialect>();
     arith::registerBufferizableOpInterfaceExternalModels(registry);
+    bufferization::registerAllocationOpInterfaceExternalModels(registry);
     linalg::registerBufferizableOpInterfaceExternalModels(registry);
     scf::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerModuleBufferizationExternalModels(registry);

diff  --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
index 078f7296fcf7a..df03c75190dd7 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
@@ -21,12 +21,13 @@ func @buffer_not_deallocated(%t : tensor<?xf32>, %c : i1) -> tensor<?xf32> {
   } else {
   // CHECK: } else {
     // CHECK: %[[m:.*]] = bufferization.to_memref %[[t]]
-    // CHECK: scf.yield %[[m]]
+    // CHECK: %[[cloned:.*]] = bufferization.clone %[[m]]
+    // CHECK: scf.yield %[[cloned]]
     scf.yield %t : tensor<?xf32>
   }
   // CHECK: }
-  // CHECK-NOT: dealloc
   // CHECK: %[[r_tensor:.*]] = bufferization.to_tensor %[[r]]
+  // CHECK: memref.dealloc %[[r]]
   // CHECK: return %[[r_tensor]]
   return %r : tensor<?xf32>
 }

diff  --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index c4ca38ab55d84..061376dfd36ae 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -81,7 +81,8 @@ func @not_inplace(
   //     CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[ALLOC]] : memref<?xf32>)
   %r = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
 
-  //     CHECK:  return %[[ALLOC]] : memref<?xf32>
+  // CHECK-NOT: dealloc
+  //     CHECK: return %[[ALLOC]] : memref<?xf32>
   return %r: tensor<?xf32>
 }
 
@@ -110,6 +111,7 @@ func @not_inplace(
                      outs(%A: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
+  //     CHECK: memref.dealloc %[[ALLOC]]
   //     CHECK: return
   // CHECK-NOT: tensor
   return %r: tensor<?x?xf32>
@@ -124,6 +126,7 @@ func @not_inplace(%A : tensor<?x?xf32> {linalg.inplaceable = true}) -> tensor<?x
   %r = linalg.matmul  ins(%A, %A: tensor<?x?xf32>, tensor<?x?xf32>)
                      outs(%A: tensor<?x?xf32>)
     -> tensor<?x?xf32>
+  // CHECK-NOT: dealloc
   return %r: tensor<?x?xf32>
 }
 // -----
@@ -328,6 +331,7 @@ func @scf_for_yield_only(%A : tensor<?xf32> {linalg.inplaceable = false},
   }
 
   //     CHECK:   return %[[CASTED]] : memref<?xf32, #[[$map_1d_dyn]]>
+  // CHECK-NOT:   dealloc
   return %r0, %r1: tensor<?xf32>, tensor<?xf32>
 }
 
@@ -419,6 +423,7 @@ func @main() {
 //      CHECK:   call @some_external_func(%[[B]]) : (memref<4xi32, #[[$DYN_1D_MAP]]>) -> ()
   call @some_external_func(%A) : (tensor<4xi32>) -> ()
 
+//      CHECK: memref.dealloc %[[alloc]]
   return
 }
 
@@ -444,6 +449,7 @@ func @main() {
     scf.yield
   }
 
+//      CHECK:   memref.dealloc %[[alloc]]
   return
 }
 
@@ -1192,3 +1198,23 @@ func @rank_reducing(
   }
   return %5: tensor<?x1x6x8xf32>
 }
+
+// -----
+
+// Note: This bufferization is inefficient, but it bufferizes correctly.
+
+// CHECK-LABEL: func @scf_execute_region_yield_non_equivalent(
+//       CHECK:   %[[alloc:.*]] = memref.alloc(%{{.*}})
+//       CHECK:   %[[clone:.*]] = bufferization.clone %[[alloc]]
+//       CHECK:   memref.dealloc %[[alloc]]
+//       CHECK:   %[[r:.*]] = memref.load %[[clone]][%{{.*}}]
+//       CHECK:   memref.dealloc %[[clone]]
+//       CHECK:   return %[[r]]
+func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 {
+  %r = scf.execute_region -> (tensor<?xf32>) {
+    %t2 = linalg.init_tensor [%i] : tensor<?xf32>
+    scf.yield %t2 : tensor<?xf32>
+  }
+  %f = tensor.extract %r[%j] : tensor<?xf32>
+  return %f : f32
+}


        


More information about the Mlir-commits mailing list