[Mlir-commits] [mlir] [mlir][bufferization] BufferDeallocation: support unstructured control flow loops (PR #66657)

Martin Erhart llvmlistbot at llvm.org
Mon Sep 18 08:31:19 PDT 2023


https://github.com/maerhart created https://github.com/llvm/llvm-project/pull/66657

This commit adds support for any kind of unstructured control flow loops.

Depends on #66626
Only review the top commit.

>From 1647897741df4bd162d2cefd02331e2dc5ae9f29 Mon Sep 17 00:00:00 2001
From: Martin Erhart <merhart at google.com>
Date: Mon, 18 Sep 2023 10:18:31 +0000
Subject: [PATCH 1/2] [mlir][bufferization] Add
 OwnershipBasedBufferDeallocation pass option to forbid clones

Adds a pass option to the `ownership-based-buffer-deallocation` pass to
forbid insertion of clone operations. This is necessary to support IR
that does not have the property that every buffer write dominates every
buffer read to the same buffer. Instead of silently producing invalid
IR, the pass would then emit an error.
This is a restriction in the old `buffer-deallocation` pass, but the new
function boundary ABI was not enforced in this old pass. Having this
option allows easier migration from the old to the new deallocation
pass because enabling this option allows the new deallocation pass to
fix IR that does not adhere to the function boundary ABI (in some
situations).
---
 .../IR/BufferDeallocationOpInterface.h        |  13 +-
 .../IR/BufferDeallocationOpInterface.td       |   4 +-
 .../Dialect/Bufferization/Pipelines/Passes.h  |  17 ++
 .../Dialect/Bufferization/Transforms/Passes.h |   8 +-
 .../Bufferization/Transforms/Passes.td        |   8 +
 .../BufferDeallocationOpInterfaceImpl.cpp     |   6 +-
 .../IR/BufferDeallocationOpInterface.cpp      |  15 +-
 .../Pipelines/BufferizationPipelines.cpp      |  14 +-
 .../Bufferization/Pipelines/CMakeLists.txt    |   1 +
 .../OwnershipBasedBufferDeallocation.cpp      |  85 ++++---
 .../dealloc-function-boundaries.mlir          |   8 +-
 .../dealloc-memoryeffect-interface.mlir       |  10 +-
 .../dealloc-region-branchop-interface.mlir    | 213 +++++++++++++-----
 .../Linalg/CPU/test-collapse-tensor.mlir      |   2 +-
 .../Linalg/CPU/test-expand-tensor.mlir        |   2 +-
 .../Dialect/Linalg/CPU/test-tensor-e2e.mlir   |   2 +-
 .../llvm-project-overlay/mlir/BUILD.bazel     |   1 +
 17 files changed, 305 insertions(+), 104 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h
index 7ac4592de7875fb..3aa61fae8c6caee 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h
@@ -96,6 +96,14 @@ struct DeallocationOptions {
   // pass the ownership of MemRef values instead of adhering to the function
   // boundary ABI.
   bool privateFuncDynamicOwnership = false;
+
+  // Allows the pass to insert `bufferization.clone` operations. This is useful
+  // for supporting IR that does not adhere to the function boundary ABI
+  // initially (excl. external functions) and to support operations with results
+  // with 'Unknown' ownership. However, it requires that all buffer writes
+  // dominate all buffer reads (i.e., only enable this option if your IR is
+  // guaranteed to have this property).
+  bool allowCloning = false;
 };
 
 /// This class collects all the state that we need to perform the buffer
@@ -142,8 +150,9 @@ class DeallocationState {
   /// a new SSA value, returned as the first element of the pair, which has
   /// 'Unique' ownership and can be used instead of the passed Value with the
   /// the ownership indicator returned as the second element of the pair.
-  std::pair<Value, Value>
-  getMemrefWithUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
+  FailureOr<std::pair<Value, Value>>
+  getMemrefWithUniqueOwnership(const DeallocationOptions &options,
+                               OpBuilder &builder, Value memref, Block *block);
 
   /// Given two basic blocks and the values passed via block arguments to the
   /// destination block, compute the list of MemRefs that have to be retained in
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td
index 3e11432c65c5f08..3b9a9c3f4fef667 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td
@@ -56,7 +56,7 @@ def BufferDeallocationOpInterface :
           method (which is especially important if operations are created that
           cannot be easily canonicalized away anymore).
         }],
-        /*retType=*/"std::pair<Value, Value>",
+        /*retType=*/"FailureOr<std::pair<Value, Value>>",
         /*methodName=*/"materializeUniqueOwnershipForMemref",
         /*args=*/(ins "DeallocationState &":$state,
                       "const DeallocationOptions &":$options,
@@ -65,7 +65,7 @@ def BufferDeallocationOpInterface :
         /*methodBody=*/[{}],
         /*defaultImplementation=*/[{
           return state.getMemrefWithUniqueOwnership(
-            builder, memref, memref.getParentBlock());
+            options, builder, memref, memref.getParentBlock());
         }]>,
   ];
 }
diff --git a/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h
index 7acacb763cd2c18..7500257ed95eac8 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h
@@ -17,6 +17,7 @@
 
 namespace mlir {
 namespace bufferization {
+struct DeallocationOptions;
 
 /// Options for the buffer deallocation pipeline.
 struct BufferDeallocationPipelineOptions
@@ -28,6 +29,22 @@ struct BufferDeallocationPipelineOptions
           "dynamically pass ownership of memrefs to callees. This can enable "
           "earlier deallocations."),
       llvm::cl::init(false)};
+  PassOptions::Option<bool> allowCloning{
+      *this, "allow-cloning",
+      llvm::cl::desc(
+          "Allows the pass to insert `bufferization.clone` operations. This is "
+          "useful for supporting IR that does not adhere to the function "
+          "boundary ABI initially (excl. external functions) and to support "
+          "operations with results with 'Unknown' ownership. However, it "
+          "requires that all buffer writes dominate all buffer reads (i.e., "
+          "only enable this option if your IR is guaranteed to have this "
+          "property)."),
+      llvm::cl::init(false)};
+
+  /// Convert this BufferDeallocationPipelineOptions struct to a
+  /// DeallocationOptions struct to be passed to the
+  /// OwnershipBasedBufferDeallocationPass.
+  DeallocationOptions asDeallocationOptions() const;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index 92520eb13da6875..37a3942f7bac6c5 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -1,6 +1,7 @@
 #ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES_H
 #define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES_H
 
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
@@ -31,7 +32,7 @@ std::unique_ptr<Pass> createBufferDeallocationPass();
 /// Creates an instance of the OwnershipBasedBufferDeallocation pass to free all
 /// allocated buffers.
 std::unique_ptr<Pass> createOwnershipBasedBufferDeallocationPass(
-    bool privateFuncDynamicOwnership = false);
+    const DeallocationOptions &options = DeallocationOptions());
 
 /// Creates a pass that optimizes `bufferization.dealloc` operations. For
 /// example, it reduces the number of alias checks needed at runtime using
@@ -134,8 +135,9 @@ func::FuncOp buildDeallocationLibraryFunction(OpBuilder &builder, Location loc,
 LogicalResult deallocateBuffers(Operation *op);
 
 /// Run ownership basedbuffer deallocation.
-LogicalResult deallocateBuffersOwnershipBased(FunctionOpInterface op,
-                                              bool privateFuncDynamicOwnership);
+LogicalResult deallocateBuffersOwnershipBased(
+    FunctionOpInterface op,
+    const DeallocationOptions &options = DeallocationOptions());
 
 /// Creates a pass that moves allocations upwards to reduce the number of
 /// required copies that are inserted during the BufferDeallocation pass.
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 62383e376f6f7a3..5b8af7a975c34b5 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -223,6 +223,14 @@ def OwnershipBasedBufferDeallocation : Pass<
            "Allows to add additional arguments to private functions to "
            "dynamically pass ownership of memrefs to callees. This can enable "
            "earlier deallocations.">,
+    Option<"allowCloning", "allow-cloning", "bool", /*default=*/"false",
+           "Allows the pass to insert `bufferization.clone` operations. This "
+           "is useful for supporting IR that does not adhere to the function "
+           "boundary ABI initially (excl. external functions) and to support "
+           "operations with results with 'Unknown' ownership. However, it "
+           "requires that all buffer writes dominate all buffer reads (i.e., "
+           "only enable this option if your IR is guaranteed to have this "
+           "property).">,
   ];
   let constructor = "mlir::bufferization::createOwnershipBasedBufferDeallocationPass()";
 
diff --git a/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp
index f2e7732e8ea4aa3..8ab4717739a7643 100644
--- a/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp
@@ -53,7 +53,7 @@ struct SelectOpInterface
     return op; // nothing to do
   }
 
-  std::pair<Value, Value>
+  FailureOr<std::pair<Value, Value>>
   materializeUniqueOwnershipForMemref(Operation *op, DeallocationState &state,
                                       const DeallocationOptions &options,
                                       OpBuilder &builder, Value value) const {
@@ -64,14 +64,14 @@ struct SelectOpInterface
     Block *block = value.getParentBlock();
     if (!state.getOwnership(selectOp.getTrueValue(), block).isUnique() ||
         !state.getOwnership(selectOp.getFalseValue(), block).isUnique())
-      return state.getMemrefWithUniqueOwnership(builder, value,
+      return state.getMemrefWithUniqueOwnership(options, builder, value,
                                                 value.getParentBlock());
 
     Value ownership = builder.create<arith::SelectOp>(
         op->getLoc(), selectOp.getCondition(),
         state.getOwnership(selectOp.getTrueValue(), block).getIndicator(),
         state.getOwnership(selectOp.getFalseValue(), block).getIndicator());
-    return {selectOp.getResult(), ownership};
+    return std::make_pair(selectOp.getResult(), ownership);
   }
 };
 
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp
index 407d75e2426e9f9..9ac2e09dec385aa 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp
@@ -132,16 +132,21 @@ void DeallocationState::getLiveMemrefsIn(Block *block,
   memrefs.append(liveMemrefs);
 }
 
-std::pair<Value, Value>
-DeallocationState::getMemrefWithUniqueOwnership(OpBuilder &builder,
-                                                Value memref, Block *block) {
+FailureOr<std::pair<Value, Value>>
+DeallocationState::getMemrefWithUniqueOwnership(
+    const DeallocationOptions &options, OpBuilder &builder, Value memref,
+    Block *block) {
   auto iter = ownershipMap.find({memref, block});
   assert(iter != ownershipMap.end() &&
          "Value must already have been registered in the ownership map");
 
   Ownership ownership = iter->second;
   if (ownership.isUnique())
-    return {memref, ownership.getIndicator()};
+    return std::make_pair(memref, ownership.getIndicator());
+
+  if (!options.allowCloning)
+    return emitError(memref.getLoc(),
+                     "MemRef value does not have valid ownership");
 
   // Instead of inserting a clone operation we could also insert a dealloc
   // operation earlier in the block and use the updated ownerships returned by
@@ -155,7 +160,7 @@ DeallocationState::getMemrefWithUniqueOwnership(OpBuilder &builder,
   Value newMemref = cloneOp.getResult();
   updateOwnership(newMemref, condition);
   memrefsToDeallocatePerBlock[newMemref.getParentBlock()].push_back(newMemref);
-  return {newMemref, condition};
+  return std::make_pair(newMemref, condition);
 }
 
 void DeallocationState::getMemrefsToRetain(
diff --git a/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp b/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp
index b2a60feb9a7f011..f08de33345ce605 100644
--- a/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp
+++ b/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp
@@ -8,23 +8,35 @@
 
 #include "mlir/Dialect/Bufferization/Pipelines/Passes.h"
 
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/Passes.h"
 
+using namespace mlir;
+using namespace bufferization;
+
 //===----------------------------------------------------------------------===//
 // Pipeline implementation.
 //===----------------------------------------------------------------------===//
 
+DeallocationOptions
+BufferDeallocationPipelineOptions::asDeallocationOptions() const {
+  DeallocationOptions opts;
+  opts.privateFuncDynamicOwnership = privateFunctionDynamicOwnership.getValue();
+  opts.allowCloning = allowCloning.getValue();
+  return opts;
+}
+
 void mlir::bufferization::buildBufferDeallocationPipeline(
     OpPassManager &pm, const BufferDeallocationPipelineOptions &options) {
   pm.addNestedPass<func::FuncOp>(
       memref::createExpandReallocPass(/*emitDeallocs=*/false));
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(createOwnershipBasedBufferDeallocationPass(
-      options.privateFunctionDynamicOwnership.getValue()));
+      options.asDeallocationOptions()));
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(createBufferDeallocationSimplificationPass());
   pm.addPass(createLowerDeallocationsPass());
diff --git a/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt
index 6e8dab64ba6b935..d67b28b308fa10e 100644
--- a/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt
@@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIRBufferizationPipelines
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization
 
   LINK_LIBS PUBLIC
+  MLIRBufferizationDialect
   MLIRBufferizationTransforms
   MLIRMemRefTransforms
   MLIRFuncDialect
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
index 43ba11cf132cb92..bc76d6fabfc3f22 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
@@ -139,10 +139,8 @@ namespace {
 /// program have a corresponding de-allocation.
 class BufferDeallocation {
 public:
-  BufferDeallocation(Operation *op, bool privateFuncDynamicOwnership)
-      : state(op) {
-    options.privateFuncDynamicOwnership = privateFuncDynamicOwnership;
-  }
+  BufferDeallocation(Operation *op, DeallocationOptions options)
+      : state(op), options(options) {}
 
   /// Performs the actual placement/creation of all dealloc operations.
   LogicalResult deallocate(FunctionOpInterface op);
@@ -376,8 +374,9 @@ class BufferDeallocation {
   /// Given an SSA value of MemRef type, returns the same of a new SSA value
   /// which has 'Unique' ownership where the ownership indicator is guaranteed
   /// to be always 'true'.
-  Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
-                                                 Value memref, Block *block);
+  FailureOr<Value> materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
+                                                            Value memref,
+                                                            Block *block);
 
   /// Returns whether the given operation implements FunctionOpInterface, has
   /// private visibility, and the private-function-dynamic-ownership pass option
@@ -391,7 +390,7 @@ class BufferDeallocation {
   /// is requested does not match the block in which 'memref' is defined, the
   /// default implementation in
   /// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead.
-  std::pair<Value, Value>
+  FailureOr<std::pair<Value, Value>>
   materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
 
   /// Checks all the preconditions for operations implementing the
@@ -430,7 +429,7 @@ class BufferDeallocation {
   DeallocationState state;
 
   /// Collects all pass options in a single place.
-  DeallocationOptions options;
+  const DeallocationOptions options;
 };
 
 } // namespace
@@ -439,13 +438,13 @@ class BufferDeallocation {
 // BufferDeallocation Implementation
 //===----------------------------------------------------------------------===//
 
-std::pair<Value, Value>
+FailureOr<std::pair<Value, Value>>
 BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
                                                Block *block) {
   // The interface can only materialize ownership indicators in the same block
   // as the defining op.
   if (memref.getParentBlock() != block)
-    return state.getMemrefWithUniqueOwnership(builder, memref, block);
+    return state.getMemrefWithUniqueOwnership(options, builder, memref, block);
 
   Operation *owner = memref.getDefiningOp();
   if (!owner)
@@ -458,7 +457,7 @@ BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
         state, options, builder, memref);
 
   // Otherwise use the default implementation.
-  return state.getMemrefWithUniqueOwnership(builder, memref, block);
+  return state.getMemrefWithUniqueOwnership(options, builder, memref, block);
 }
 
 static bool regionOperatesOnMemrefValues(Region &region) {
@@ -710,13 +709,17 @@ BufferDeallocation::handleInterface(RegionBranchOpInterface op) {
   return newOp.getOperation();
 }
 
-Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
+FailureOr<Value> BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
     OpBuilder &builder, Value memref, Block *block) {
   // First, make sure we at least have 'Unique' ownership already.
-  std::pair<Value, Value> newMemrefAndOnwership =
+  FailureOr<std::pair<Value, Value>> newMemrefAndOnwership =
       materializeUniqueOwnership(builder, memref, block);
-  Value newMemref = newMemrefAndOnwership.first;
-  Value condition = newMemrefAndOnwership.second;
+
+  if (failed(newMemrefAndOnwership))
+    return failure();
+
+  Value newMemref = newMemrefAndOnwership->first;
+  Value condition = newMemrefAndOnwership->second;
 
   // Avoid inserting additional IR if ownership is already guaranteed. In
   // particular, this is already the case when we had 'Unknown' ownership
@@ -817,8 +820,13 @@ FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
         newOperands.push_back(operand);
         continue;
       }
-      auto [memref, condition] =
+      FailureOr<std::pair<Value, Value>> memrefAndCondition =
           materializeUniqueOwnership(builder, operand, op->getBlock());
+
+      if (failed(memrefAndCondition))
+        return failure();
+
+      auto [memref, condition] = *memrefAndCondition;
       newOperands.push_back(memref);
       ownershipIndicatorsToAdd.push_back(condition);
     }
@@ -901,8 +909,28 @@ BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
       if (!isMemref(val.get()))
         continue;
 
-      val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(),
-                                                       op->getBlock()));
+      if (options.allowCloning) {
+        // Here we assume that all memref write operations dominate all memref
+        // read operations, but the function boundary ABI of non-external
+        // functions does not necessarily have to be adhered to.
+        FailureOr<Value> newMemref = materializeMemrefWithGuaranteedOwnership(
+            builder, val.get(), op->getBlock());
+
+        if (failed(newMemref))
+          return failure();
+
+        val.set(*newMemref);
+      } else {
+        // Here memrew writes don't have to dominate reads, but the function
+        // boundary ABI has to be adhered to from the start.
+        FailureOr<std::pair<Value, Value>> newMemref =
+            materializeUniqueOwnership(builder, val.get(), op->getBlock());
+
+        if (failed(newMemref))
+          return failure();
+
+        val.set(newMemref->first);
+      }
     }
   }
 
@@ -995,17 +1023,21 @@ struct OwnershipBasedBufferDeallocationPass
     : public bufferization::impl::OwnershipBasedBufferDeallocationBase<
           OwnershipBasedBufferDeallocationPass> {
   OwnershipBasedBufferDeallocationPass() = default;
-  OwnershipBasedBufferDeallocationPass(bool privateFuncDynamicOwnership)
+  OwnershipBasedBufferDeallocationPass(const DeallocationOptions &options)
       : OwnershipBasedBufferDeallocationPass() {
-    this->privateFuncDynamicOwnership.setValue(privateFuncDynamicOwnership);
+    privateFuncDynamicOwnership.setValue(options.privateFuncDynamicOwnership);
+    allowCloning.setValue(options.allowCloning);
   }
   void runOnOperation() override {
     func::FuncOp func = getOperation();
     if (func.isExternal())
       return;
 
-    if (failed(
-            deallocateBuffersOwnershipBased(func, privateFuncDynamicOwnership)))
+    DeallocationOptions options;
+    options.privateFuncDynamicOwnership =
+        privateFuncDynamicOwnership.getValue();
+    options.allowCloning = allowCloning.getValue();
+    if (failed(deallocateBuffersOwnershipBased(func, options)))
       signalPassFailure();
   }
 };
@@ -1017,9 +1049,9 @@ struct OwnershipBasedBufferDeallocationPass
 //===----------------------------------------------------------------------===//
 
 LogicalResult bufferization::deallocateBuffersOwnershipBased(
-    FunctionOpInterface op, bool privateFuncDynamicOwnership) {
+    FunctionOpInterface op, const DeallocationOptions &options) {
   // Gather all required allocation nodes and prepare the deallocation phase.
-  BufferDeallocation deallocation(op, privateFuncDynamicOwnership);
+  BufferDeallocation deallocation(op, options);
 
   // Place all required temporary clone and dealloc nodes.
   return deallocation.deallocate(op);
@@ -1031,7 +1063,6 @@ LogicalResult bufferization::deallocateBuffersOwnershipBased(
 
 std::unique_ptr<Pass>
 mlir::bufferization::createOwnershipBasedBufferDeallocationPass(
-    bool privateFuncDynamicOwnership) {
-  return std::make_unique<OwnershipBasedBufferDeallocationPass>(
-      privateFuncDynamicOwnership);
+    const DeallocationOptions &options) {
+  return std::make_unique<OwnershipBasedBufferDeallocationPass>(options);
 }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir
index 13c55d0289880ef..387df9aaa688ae5 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=false \
+// RUN: mlir-opt --allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=allow-cloning=true \
 // RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
 // RUN: mlir-opt --allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true \
 // RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s --check-prefix=CHECK-DYNAMIC
@@ -92,6 +92,12 @@ func.func private @redundantOperations(%arg0: memref<2xf32>) {
 // since they are operands of return operation and should escape from
 // deallocating. It should dealloc %y after CopyOp.
 
+// Note: when dynamic ownership is disabled, we need to allow cloning in this 
+// example because a function argument is returned again which is against the
+// function boundary ABI. Buffer deallocation will fix this by inserting an
+// additional clone operation, but as a prerequisite all buffer writes have to
+// dominate all buffer reads.
+
 func.func private @memref_in_function_results(
   %arg0: memref<5xf32>,
   %arg1: memref<10xf32>,
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir
index 44cf16385603e07..93c7c7b23775202 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation \
+// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation=allow-cloning=true \
 // RUN:   --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
 // RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null
 
@@ -110,6 +110,14 @@ func.func @dealloc_existing_clones(%arg0: memref<?x?xf64>, %arg1: memref<?x?xf64
 
 // -----
 
+// Note: memref.get_global does not provide ownership of the memref it returns
+// because a global constant must not be deallocated. However, the function
+// boundary ABI requires to return ownership for function results. Enabling
+// "allow-cloning" fixes this issue automatically but requires buffer writes to
+// dominate all buffer reads (not just for this memref but for all of them in
+// the IR). Otherwise, a clone operation has to be inserted manually before
+// running buffer deallocation.
+
 memref.global "private" constant @__constant_4xf32 : memref<4xf32> = dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]>
 
 func.func @op_without_aliasing_and_allocation() -> memref<4xf32> {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir
index dc372749fc074be..bac43d4109fb2fb 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir
@@ -1,6 +1,8 @@
 // RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation \
 // RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
 // RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null
+// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=allow-cloning=true \
+// RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck --check-prefix=CLONES %s
 
 // RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file --verify-diagnostics > /dev/null
 
@@ -55,6 +57,8 @@ func.func @nested_regions_and_cond_branch(
 //  CHECK-NEXT:   bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND0]])
 //       CHECK:   return
 
+// CLONES-LABEL: func @nested_regions_and_cond_branch
+
 // -----
 
 // Test Case: nested region control flow
@@ -85,13 +89,23 @@ func.func @nested_region_control_flow(
 //       CHECK:     bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
 //   CHECK-NOT: retain
 //       CHECK:     scf.yield [[ALLOC]], %false
-//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:     scf.yield [[V0]]#0
-//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:     scf.yield [[CLONE]]
-//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] : {{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK:   return [[V1]]
+//       CHECK:   return [[V0]]#0
+
+// CLONES-LABEL: func @nested_region_control_flow
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc(
+//       CLONES:   [[V0:%.+]]:2 = scf.if
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:     [[ALLOC1:%.+]] = memref.alloc(
+//       CLONES:     bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
+//   CLONES-NOT: retain
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:   [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:     scf.yield [[V0]]#0
+//       CLONES:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:     scf.yield [[CLONE]]
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES:   bufferization.dealloc ([[ALLOC]], [[BASE]] : {{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES:   return [[V1]]
 
 // -----
 
@@ -120,13 +134,22 @@ func.func @nested_region_control_flow_div(
 //       CHECK:     scf.yield [[ALLOC]], %false
 //       CHECK:     [[ALLOC1:%.+]] = memref.alloc(
 //       CHECK:     scf.yield [[ALLOC1]], %true
-//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:     scf.yield [[V0]]#0
-//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:     scf.yield [[CLONE]]
-//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK:   return [[V1]]
+//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true{{[0-9_]*}}) retain ([[V0]]#0 :
+//       CHECK:   return [[V0]]#0
+
+// CLONES-LABEL: func @nested_region_control_flow_div
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc(
+//       CLONES:   [[V0:%.+]]:2 = scf.if
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:     [[ALLOC1:%.+]] = memref.alloc(
+//       CLONES:     scf.yield [[ALLOC1]], %true
+//       CLONES:   [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:     scf.yield [[V0]]#0
+//       CLONES:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:     scf.yield [[CLONE]]
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES:   return [[V1]]
 
 // -----
 
@@ -158,13 +181,25 @@ func.func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
 //       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
 //       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
 //       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
-//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:     scf.yield [[V0]]#0
-//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:     scf.yield [[CLONE]]
-//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK:   return [[V1]]
+//   CHECK-NOT:   bufferization.dealloc
+//       CHECK:   return [[V0]]#0
+
+// CLONES-LABEL: func.func @inner_region_control_flow
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc(
+//       CLONES:   [[V0:%.+]]:2 = test.region_if [[ALLOC]], %false
+//       CLONES:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
+//       CLONES:     test.region_if_yield [[ARG1]], [[ARG2]]
+//       CLONES:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
+//       CLONES:     test.region_if_yield [[ARG1]], [[ARG2]]
+//       CLONES:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
+//       CLONES:     test.region_if_yield [[ARG1]], [[ARG2]]
+//       CLONES:   [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:     scf.yield [[V0]]#0
+//       CLONES:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:     scf.yield [[CLONE]]
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES:   return [[V1]]
 
 // -----
 
@@ -209,6 +244,8 @@ func.func @nestedRegionsAndCondBranchAlloca(
 //       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[A0]]
 //       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[COND]])
 
+// CLONES-LABEL: func @nestedRegionsAndCondBranchAlloca
+
 // -----
 
 func.func @nestedRegionControlFlowAlloca(
@@ -232,13 +269,22 @@ func.func @nestedRegionControlFlowAlloca(
 //       CHECK:   scf.yield [[ALLOC]], %false
 //       CHECK:   memref.alloca(
 //       CHECK:   scf.yield [[ALLOC]], %false
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK: return [[V1]]
+//   CHECK-NOT: bufferization.dealloc
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @nestedRegionControlFlowAlloca
+//       CLONES: [[ALLOC:%.+]] = memref.alloc(
+//       CLONES: [[V0:%.+]]:2 = scf.if
+//       CLONES:   scf.yield [[ALLOC]], %false
+//       CLONES:   memref.alloca(
+//       CLONES:   scf.yield [[ALLOC]], %false
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES: return [[V1]]
 
 // -----
 
@@ -279,6 +325,8 @@ func.func @loop_alloc(
 //       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1)
 //   CHECK-NOT: retain
 
+// CLONES-LABEL: func @loop_alloc
+
 // -----
 
 // Test Case: structured control-flow loop with a nested if operation.
@@ -326,6 +374,8 @@ func.func @loop_nested_if_no_alloc(
 // TODO: we know statically that the inner dealloc will never deallocate
 //       anything, i.e., we can optimize it away
 
+// CLONES-LABEL: func @loop_nested_if_no_alloc
+
 // -----
 
 // Test Case: structured control-flow loop with a nested if operation using
@@ -364,13 +414,29 @@ func.func @loop_nested_if_alloc(
 //       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
 //       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
 //       CHECK: }
-//       CHECK: [[V2:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V2]] :
-//       CHECK: return [[V2]]
+//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true{{[0-9_]*}}) retain ([[V0]]#0 :
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @loop_nested_if_alloc
+//  CLONES-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>)
+//       CLONES: [[ALLOC:%.+]] = memref.alloc()
+//       CLONES: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG5:%.+]] = [[ARG3]], [[ARG6:%.+]] = %false
+//       CLONES:   [[V1:%.+]]:2 = scf.if
+//       CLONES:     [[ALLOC1:%.+]] = memref.alloc()
+//       CLONES:     scf.yield [[ALLOC1]], %true
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG5]]
+//       CLONES:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG6]]) retain ([[V1]]#0 :
+//       CLONES:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
+//       CLONES:   scf.yield [[V1]]#0, [[OWN_AGG]]
+//       CLONES: }
+//       CLONES: [[V2:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V2]] :
+//       CLONES: return [[V2]]
 
 // -----
 
@@ -447,6 +513,8 @@ func.func @loop_nested_alloc(
 
 // TODO: all the retain operands could be removed by doing some more thorough analysis
 
+// CLONES-LABEL: func @loop_nested_alloc
+
 // -----
 
 func.func @affine_loop() -> f32 {
@@ -466,6 +534,8 @@ func.func @affine_loop() -> f32 {
 //       CHECK:   affine.yield
 //       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
 
+// CLONES-LABEL: func @affine_loop
+
 // -----
 
 func.func @assumingOp(
@@ -508,6 +578,8 @@ func.func @assumingOp(
 //   CHECK-NOT: retain
 //       CHECK: return
 
+// CLONES-LABEL: func @assumingOp
+
 // -----
 
 // Test Case: The op "test.bar" does not implement the RegionBranchOpInterface.
@@ -570,6 +642,8 @@ func.func @while_two_arg(%arg0: index) {
 //       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1
 //       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#2, [[V0]]#3)
 
+// CLONES-LABEL: func @while_two_arg
+
 // -----
 
 func.func @while_three_arg(%arg0: index) {
@@ -606,6 +680,8 @@ func.func @while_three_arg(%arg0: index) {
 
 // TODO: better alias analysis could simplify the dealloc inside the body further
 
+// CLONES-LABEL: func @while_three_arg
+
 // -----
 
 // Memref allocated in `then` region and passed back to the parent if op.
@@ -626,17 +702,25 @@ func.func @test_affine_if_1(%arg0: memref<10xf32>) -> memref<10xf32> {
 //       CHECK:   [[ALLOC:%.+]] = memref.alloc()
 //       CHECK:   affine.yield [[ALLOC]], %true
 //       CHECK:   affine.yield [[ARG0]], %false
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) retain ([[V1]] :
-//       CHECK: return [[V1]]
+//       CHECK: return [[V0]]#0
 
 // TODO: the dealloc could be optimized away since the memref to be deallocated
 //       either aliases with V1 or the condition is false
 
+// CLONES-LABEL: func @test_affine_if_1
+//  CLONES-SAME: ([[ARG0:%.*]]: memref<10xf32>)
+//       CLONES: [[V0:%.+]]:2 = affine.if
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc()
+//       CLONES:   affine.yield [[ALLOC]], %true
+//       CLONES:   affine.yield [[ARG0]], %false
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) retain ([[V1]] :
+//       CLONES: return [[V1]]
+
 // -----
 
 // Memref allocated before parent IfOp and used in `then` region.
@@ -652,19 +736,28 @@ func.func @test_affine_if_2() -> memref<10xf32> {
   }
   return %0 : memref<10xf32>
 }
+
 // CHECK-LABEL: func @test_affine_if_2
 //       CHECK: [[ALLOC:%.+]] = memref.alloc()
 //       CHECK: [[V0:%.+]]:2 = affine.if
 //       CHECK:   affine.yield [[ALLOC]], %false
 //       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
 //       CHECK:   affine.yield [[ALLOC1]], %true
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK: return [[V1]]
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @test_affine_if_2
+//       CLONES: [[ALLOC:%.+]] = memref.alloc()
+//       CLONES: [[V0:%.+]]:2 = affine.if
+//       CLONES:   affine.yield [[ALLOC]], %false
+//       CLONES:   [[ALLOC1:%.+]] = memref.alloc()
+//       CLONES:   affine.yield [[ALLOC1]], %true
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES: return [[V1]]
 
 // -----
 
@@ -688,10 +781,18 @@ func.func @test_affine_if_3() -> memref<10xf32> {
 //       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
 //       CHECK:   affine.yield [[ALLOC1]], %true
 //       CHECK:   affine.yield [[ALLOC]], %false
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]]
-//       CHECK: return [[V1]]
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @test_affine_if_3
+//       CLONES: [[ALLOC:%.+]] = memref.alloc()
+//       CLONES: [[V0:%.+]]:2 = affine.if
+//       CLONES:   [[ALLOC1:%.+]] = memref.alloc()
+//       CLONES:   affine.yield [[ALLOC1]], %true
+//       CLONES:   affine.yield [[ALLOC]], %false
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]]
+//       CLONES: return [[V1]]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
index 43e423d4c3e8e14..768fde5d6dcb992 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline=allow-cloning=true -convert-bufferization-to-memref \
 // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \
 // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
index a101b76ef186b5e..0db04eb68efa416 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline=allow-cloning=true -convert-bufferization-to-memref \
 // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \
 // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
index 38b49cd444df3c1..521e669e890c84a 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -arith-bufferize -linalg-bufferize \
-// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \
+// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline=allow-cloning=true -convert-bufferization-to-memref -convert-linalg-to-loops \
 // RUN: -convert-arith-to-llvm -convert-scf-to-cf -convert-cf-to-llvm --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 5d1574162aa690c..e96d17976186862 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -12223,6 +12223,7 @@ cc_library(
     hdrs = ["include/mlir/Dialect/Bufferization/Pipelines/Passes.h"],
     includes = ["include"],
     deps = [
+        ":BufferizationDialect",
         ":BufferizationToMemRef",
         ":BufferizationTransforms",
         ":FuncDialect",

>From 5563e5414b068e3aaa0654554f446193b2efc339 Mon Sep 17 00:00:00 2001
From: Martin Erhart <merhart at google.com>
Date: Mon, 18 Sep 2023 15:26:47 +0000
Subject: [PATCH 2/2] [mlir][bufferization] BufferDeallocation: support
 unstructured control flow loops

This commit adds support for any kind of unstructured control flow loops.
---
 .../OwnershipBasedBufferDeallocation.cpp      | 106 -----
 .../dealloc-unstructured-cf-loops.mlir        | 404 ++++++++++++++++++
 .../invalid-buffer-deallocation.mlir          |  66 ---
 3 files changed, 404 insertions(+), 172 deletions(-)
 create mode 100644 mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-unstructured-cf-loops.mlir

diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
index bc76d6fabfc3f22..c022d47199200ed 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
@@ -47,89 +47,6 @@ static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) {
 
 static bool isMemref(Value v) { return v.getType().isa<BaseMemRefType>(); }
 
-//===----------------------------------------------------------------------===//
-// Backedges analysis
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-/// A straight-forward program analysis which detects loop backedges induced by
-/// explicit control flow.
-class Backedges {
-public:
-  using BlockSetT = SmallPtrSet<Block *, 16>;
-  using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
-
-public:
-  /// Constructs a new backedges analysis using the op provided.
-  Backedges(Operation *op) { recurse(op); }
-
-  /// Returns the number of backedges formed by explicit control flow.
-  size_t size() const { return edgeSet.size(); }
-
-  /// Returns the start iterator to loop over all backedges.
-  BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
-
-  /// Returns the end iterator to loop over all backedges.
-  BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
-
-private:
-  /// Enters the current block and inserts a backedge into the `edgeSet` if we
-  /// have already visited the current block. The inserted edge links the given
-  /// `predecessor` with the `current` block.
-  bool enter(Block &current, Block *predecessor) {
-    bool inserted = visited.insert(&current).second;
-    if (!inserted)
-      edgeSet.insert(std::make_pair(predecessor, &current));
-    return inserted;
-  }
-
-  /// Leaves the current block.
-  void exit(Block &current) { visited.erase(&current); }
-
-  /// Recurses into the given operation while taking all attached regions into
-  /// account.
-  void recurse(Operation *op) {
-    Block *current = op->getBlock();
-    // If the current op implements the `BranchOpInterface`, there can be
-    // cycles in the scope of all successor blocks.
-    if (isa<BranchOpInterface>(op)) {
-      for (Block *succ : current->getSuccessors())
-        recurse(*succ, current);
-    }
-    // Recurse into all distinct regions and check for explicit control-flow
-    // loops.
-    for (Region &region : op->getRegions()) {
-      if (!region.empty())
-        recurse(region.front(), current);
-    }
-  }
-
-  /// Recurses into explicit control-flow structures that are given by
-  /// the successor relation defined on the block level.
-  void recurse(Block &block, Block *predecessor) {
-    // Try to enter the current block. If this is not possible, we are
-    // currently processing this block and can safely return here.
-    if (!enter(block, predecessor))
-      return;
-
-    // Recurse into all operations and successor blocks.
-    for (Operation &op : block.getOperations())
-      recurse(&op);
-
-    // Leave the current block.
-    exit(block);
-  }
-
-  /// Stores all blocks that are currently visited and on the processing stack.
-  BlockSetT visited;
-
-  /// Stores all backedges in the format (source, target).
-  BackedgeSetT edgeSet;
-};
-
-} // namespace
-
 //===----------------------------------------------------------------------===//
 // BufferDeallocation
 //===----------------------------------------------------------------------===//
@@ -393,12 +310,6 @@ class BufferDeallocation {
   FailureOr<std::pair<Value, Value>>
   materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
 
-  /// Checks all the preconditions for operations implementing the
-  /// FunctionOpInterface that have to hold for the deallocation to be
-  /// applicable:
-  /// (1) Checks that there are not explicit control flow loops.
-  static LogicalResult verifyFunctionPreconditions(FunctionOpInterface op);
-
   /// Checks all the preconditions for operations inside the region of
   /// operations implementing the FunctionOpInterface that have to hold for the
   /// deallocation to be applicable:
@@ -475,19 +386,6 @@ static bool regionOperatesOnMemrefValues(Region &region) {
   return result.wasInterrupted();
 }
 
-LogicalResult
-BufferDeallocation::verifyFunctionPreconditions(FunctionOpInterface op) {
-  // (1) Ensure that there are supported loops only (no explicit control flow
-  // loops).
-  Backedges backedges(op);
-  if (backedges.size()) {
-    op->emitError("Only structured control-flow loops are supported.");
-    return failure();
-  }
-
-  return success();
-}
-
 LogicalResult BufferDeallocation::verifyOperationPreconditions(Operation *op) {
   // (1) Check that the control flow structures are supported.
   auto regions = op->getRegions();
@@ -570,10 +468,6 @@ BufferDeallocation::updateFunctionSignature(FunctionOpInterface op) {
 }
 
 LogicalResult BufferDeallocation::deallocate(FunctionOpInterface op) {
-  // Stop and emit a proper error message if we don't support the input IR.
-  if (failed(verifyFunctionPreconditions(op)))
-    return failure();
-
   // Process the function block by block.
   auto result = op->walk<WalkOrder::PostOrder, ForwardDominanceIterator<>>(
       [&](Block *block) {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-unstructured-cf-loops.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-unstructured-cf-loops.mlir
new file mode 100644
index 000000000000000..36400d11e74771b
--- /dev/null
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-unstructured-cf-loops.mlir
@@ -0,0 +1,404 @@
+// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation \
+// RUN:  -buffer-deallocation-simplification -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null
+
+// RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file > /dev/null
+
+func.func private @infinite_loop(%arg0: memref<2xi32>) -> memref<2xi32> {
+  cf.br ^body(%arg0 : memref<2xi32>)
+^body(%arg1: memref<2xi32>):
+  %alloc = memref.alloc() : memref<2xi32>
+  cf.br ^body(%alloc : memref<2xi32>)
+}
+
+// CHECK-LABEL: func private @infinite_loop
+//  CHECK-SAME: ([[ARG0:%.+]]: memref<2xi32>)
+//       CHECK:   cf.br [[BODY:\^.+]]([[ARG0]], %false
+//       CHECK: [[BODY]]([[M0:%.+]]: memref<2xi32>, [[OWN:%.+]]: i1):
+//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
+// TODO: this extract_strided_metadata could be optimized away
+//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[M0]]
+//       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[OWN]])
+//       CHECK:   cf.br [[BODY]]([[ALLOC]], %true
+
+// -----
+
+func.func private @simple_for_loop(
+    %arg0: index, %arg1: index, %arg2: index,
+    %arg3: memref<2xf32>, %arg4: memref<2xf32>
+  ) {
+  %alloc = memref.alloc() : memref<2xf32>
+  "test.memref_user"(%alloc) : (memref<2xf32>) -> ()
+  cf.br ^check(%arg0, %arg3 : index, memref<2xf32>)
+^check(%0: index, %1: memref<2xf32>):  // 2 preds: ^bb0, ^body
+  %2 = arith.cmpi slt, %0, %arg1 : index
+  cf.cond_br %2, ^body, ^exit
+^body:  // pred: ^check
+  %3 = arith.cmpi eq, %0, %arg1 : index
+  %alloc_0 = memref.alloc() : memref<2xf32>
+  %4 = arith.addi %0, %arg2 : index
+  cf.br ^check(%4, %alloc_0 : index, memref<2xf32>)
+^exit:  // pred: ^check
+  test.copy(%1, %arg4) : (memref<2xf32>, memref<2xf32>)
+  return
+}
+
+// CHECK-LABEL: func private @simple_for_loop
+//  CHECK-SAME: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: index, [[ARG2:%.+]]: index, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>)
+//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
+//       CHECK:   test.memref_user
+//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
+//       CHECK:   cf.br [[CHECK:\^.+]]([[ARG0]], [[ARG3]], %false
+//       CHECK: [[CHECK]]({{.*}}: index, [[M0:%.+]]: memref<2xf32>, [[OWN:%.+]]: i1):
+//       CHECK:   [[COND:%.+]] = arith.cmpi slt
+//       CHECK:   [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[M0]]
+//       CHECK:   [[THEN_OWN:%.+]] = arith.andi [[OWN]], [[COND]]
+// TODO: the following dealloc could be simplified such that it doesn't have a retain
+//       CHECK:   bufferization.dealloc ([[BASE0]] :{{.*}}) if ([[THEN_OWN]]) retain ([[ARG4]] :
+//       CHECK:   [[NEG_COND:%.+]] = arith.xori [[COND]], %true
+//       CHECK:   [[ELSE_OWN:%.+]] = arith.andi [[OWN]], [[NEG_COND]]
+// TODO: the following dealloc could be completely optimized away
+//       CHECK:   [[ELSE_UPDATED_OWN:%.+]]:2 = bufferization.dealloc ([[BASE0]] :{{.*}}) if ([[ELSE_OWN]]) retain ([[M0]], [[ARG4]] :
+//       CHECK:   cf.cond_br [[COND]], [[BODY:\^.+]], [[EXIT:\^.+]]
+//       CHECK: [[BODY]]:
+//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
+//       CHECK:   cf.br [[CHECK]]({{.*}}, [[ALLOC1]], %true
+//       CHECK: [[EXIT]]:
+//       CHECK:   test.copy
+//       CHECK:   [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[M0]]
+//       CHECK:   bufferization.dealloc ([[BASE1]] :{{.*}}) if ([[ELSE_UPDATED_OWN]]#0)
+
+// -----
+
+func.func private @loop_nested_if_no_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
+  %alloc = memref.alloc() : memref<2xf32>
+  cf.br ^loop_check(%arg0, %arg3 : index, memref<2xf32>)
+^loop_check(%0: index, %1: memref<2xf32>):  // 2 preds: ^bb0, ^body
+  %2 = arith.cmpi slt, %0, %arg1 : index
+  cf.cond_br %2, ^if_check, ^exit
+^if_check:  // pred: ^loop_check
+  %3 = arith.cmpi eq, %0, %arg1 : index
+  cf.cond_br %3, ^then, ^else
+^then:  // pred: ^if_check
+  cf.br ^join(%alloc : memref<2xf32>)
+^else:  // pred: ^if_check
+  cf.br ^join(%1 : memref<2xf32>)
+^join(%4: memref<2xf32>):  // 2 preds: ^then, ^else
+  cf.br ^body
+^body:  // pred: ^join
+  %5 = arith.addi %0, %arg2 : index
+  cf.br ^loop_check(%5, %4 : index, memref<2xf32>)
+^exit:  // pred: ^loop_check
+  test.copy(%1, %arg4) : (memref<2xf32>, memref<2xf32>)
+  return
+}
+
+// CHECK-LABEL: func private @loop_nested_if_no_alloc
+//  CHECK-SAME: ([[ARG0:%.+]]: index, [[ARG1:%.+]]: index, [[ARG2:%.+]]: index, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>)
+//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
+//       CHECK:   cf.br [[LOOP_CHECK:\^.+]]([[ARG0]], [[ARG3]], %false
+//       CHECK: [[LOOP_CHECK]]({{.*}}: index, [[M0:%.+]]: memref<2xf32>, [[OWN:%.+]]: i1):
+//       CHECK:   [[OUTER_COND:%.+]] = arith.cmpi slt
+//       CHECK:   [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[M0]]
+//       CHECK:   [[THEN_OWN:%.+]] = arith.andi [[OWN]], [[OUTER_COND]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[THEN_UPDATED_OWN:%.+]]:3 = bufferization.dealloc ([[ALLOC]], [[BASE0]] :{{.*}}) if ([[OUTER_COND]], [[THEN_OWN]]) retain ([[M0]], [[ARG4]], [[ALLOC]] :
+//       CHECK:   [[NEG_OUTER_COND0:%.+]] = arith.xori [[OUTER_COND]], %true
+//       CHECK:   [[NEG_OUTER_COND1:%.+]] = arith.xori [[OUTER_COND]], %true
+//       CHECK:   [[ELSE_OWN:%.+]] = arith.andi [[OWN]], [[NEG_OUTER_COND1]]
+// TODO: this dealloc can be simplified such that it only deallocates [[ALLOC]] without any retained values in the list
+//       CHECK:   [[ELSE_UPDATED_OWN:%.+]]:2 = bufferization.dealloc ([[ALLOC]], [[BASE0]] :{{.*}}) if ([[NEG_OUTER_COND0]], [[ELSE_OWN]]) retain ([[M0]], [[ARG4]] :
+//       CHECK:   [[NEW_ALLOC_OWN:%.+]] = arith.select [[OUTER_COND]], [[THEN_UPDATED_OWN]]#0, [[ELSE_UPDATED_OWN]]#0
+//       CHECK:   cf.cond_br [[OUTER_COND]], [[IF_CHECK:\^.+]], [[EXIT:\^.+]]
+//       CHECK: [[IF_CHECK]]:
+//       CHECK:   [[INNER_COND:%.+]] = arith.cmpi eq
+//       CHECK:   [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[M0]]
+//       CHECK:   [[THEN_OWN:%.+]] = arith.andi [[NEW_ALLOC_OWN]], [[INNER_COND]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   bufferization.dealloc ([[BASE1]] :{{.*}}) if ([[THEN_OWN]]) retain ([[ARG4]], [[ALLOC]] :
+//       CHECK:   [[NEG_INNER_COND0:%.+]] = arith.xori [[INNER_COND]], %true
+//       CHECK:   [[ELSE_OWN:%.+]] = arith.andi [[NEW_ALLOC_OWN]], [[NEG_INNER_COND0]]
+//       CHECK:   [[NEG_INNER_COND1:%.+]] = arith.xori [[INNER_COND]], %true
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   bufferization.dealloc ([[BASE1]], [[ALLOC]] :{{.*}}) if ([[ELSE_OWN]], [[NEG_INNER_COND1]]) retain ([[M0]], [[ARG4]], [[ALLOC]] :
+//       CHECK:   cf.cond_br [[INNER_COND]], [[THEN:\^.+]], [[ELSE:\^.+]]
+//       CHECK: [[THEN]]:
+//       CHECK:   cf.br [[JOIN:\^.+]]([[ALLOC]], %true
+//       CHECK: [[ELSE]]:
+//       CHECK:   [[BASE2:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[M0]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN:%.+]]:3 = bufferization.dealloc ([[BASE2]], [[ALLOC]] :{{.*}}) if ([[NEW_ALLOC_OWN]], %true{{[0-9_]*}}) retain ([[M0]], [[ARG4]], [[ALLOC]] :
+//       CHECK:   cf.br [[JOIN]]([[M0]], [[UPDATED_OWN]]#0
+//       CHECK: [[JOIN]]([[A0:%.+]]: memref<2xf32>, [[C0:%.+]]: i1):
+//       CHECK:   [[BASE3:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN1:%.+]]:3 = bufferization.dealloc ([[ALLOC]], [[BASE3]] :{{.*}}) if (%true{{[0-9_]*}}, [[C0]]) retain ([[A0]], [[ARG4]], [[ALLOC]] :
+//       CHECK:   cf.br [[BODY:\^.+]]
+//       CHECK: [[BODY]]:
+//       CHECK:   [[BASE4:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN2:%.+]]:3 = bufferization.dealloc ([[BASE4]], [[ALLOC]] :{{.*}}) if ([[UPDATED_OWN1]]#0, %true{{[0-9_]*}}) retain ([[A0]], [[ARG4]], [[ALLOC]] :
+//       CHECK:   cf.br [[LOOP_CHECK]]({{.*}}, [[A0]], [[UPDATED_OWN2]]#0
+//       CHECK: [[EXIT]]:
+//       CHECK:   test.copy
+//       CHECK:   [[BASE5:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[M0]]
+//       CHECK:   bufferization.dealloc ([[BASE5]] :{{.*}}) if ([[NEW_ALLOC_OWN]])
+
+// -----
+
+func.func private @loop_nested_if_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>) -> memref<2xf32> {
+  %alloc = memref.alloc() : memref<2xf32>
+  cf.br ^loop_check(%arg0, %arg3 : index, memref<2xf32>)
+^loop_check(%0: index, %1: memref<2xf32>):  // 2 preds: ^bb0, ^body
+  %2 = arith.cmpi slt, %0, %arg1 : index
+  cf.cond_br %2, ^if_check, ^exit
+^if_check:  // pred: ^loop_check
+  %3 = arith.cmpi eq, %0, %arg1 : index
+  cf.cond_br %3, ^then, ^else
+^then:  // pred: ^if_check
+  %alloc_0 = memref.alloc() : memref<2xf32>
+  cf.br ^join(%alloc_0 : memref<2xf32>)
+^else:  // pred: ^if_check
+  cf.br ^join(%alloc : memref<2xf32>)
+^join(%4: memref<2xf32>):  // 2 preds: ^then, ^else
+  cf.br ^body
+^body:  // pred: ^join
+  %5 = arith.addi %0, %arg2 : index
+  cf.br ^loop_check(%5, %4 : index, memref<2xf32>)
+^exit:  // pred: ^loop_check
+  return %1 : memref<2xf32>
+}
+
+// CHECK-LABEL: func private @loop_nested_if_alloc
+//  CHECK-SAME: ({{.*}}: index, {{.*}}: index, {{.*}}: index, [[ARG3:%.+]]: memref<2xf32>)
+//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
+//   CHECK-NOT:   bufferization.dealloc
+//       CHECK:   cf.br [[LOOP_CHECK:\^.+]]({{.*}}, [[ARG3]], %false
+//       CHECK: [[LOOP_CHECK]]({{.*}}: index, [[A0:%.+]]: memref<2xf32>, [[C0:%.+]]: i1):
+//       CHECK:   [[COND:%.+]] = arith.cmpi slt
+//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
+//       CHECK:   [[THEN_COND:%.+]] = arith.andi [[C0]], [[COND]]
+//       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[THEN_COND]]) retain ([[ALLOC]] :
+// TODO: an optimization pass could move this deallocation into the "exit" block
+//       CHECK:   [[ELSE_COND:%.+]] = arith.xori [[COND]], %true
+//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if ([[ELSE_COND]]) retain ([[A0]] :
+//       CHECK:   cf.cond_br
+//       CHECK: ^{{.*}}:
+//   CHECK-NOT:   bufferization.dealloc
+//       CHECK:   cf.cond_br
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[ALLOC0:%.+]] = memref.alloc(
+//   CHECK-NOT:   bufferization.dealloc
+//       CHECK:   cf.br [[JOIN:\^.+]]([[ALLOC0]], %true
+//       CHECK: ^{{.*}}:
+//   CHECK-NOT:   bufferization.dealloc
+//       CHECK:   cf.br [[JOIN]]([[ALLOC]], %true
+//       CHECK: [[JOIN]]([[A1:%.+]]: memref<2xf32>, [[C1:%.+]]: i1):
+//       CHECK:   [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN:%.+]]:2 = bufferization.dealloc ([[ALLOC]], [[BASE1]] :{{.*}}) if (%true{{[0-9_]*}}, [[C1]]) retain ([[A1]], [[ALLOC]] :
+//       CHECK:   cf.br
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[BASE2:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN1:%.+]]:2 = bufferization.dealloc ([[BASE2]], [[ALLOC]] :{{.*}}) if ([[UPDATED_OWN]]#0, %true{{[0-9_]*}}) retain ([[A1]], [[ALLOC]] :
+//       CHECK:   cf.br ^bb1({{.*}}, [[A1]], [[UPDATED_OWN1]]#0
+//       CHECK: ^{{.*}}:
+//   CHECK-NOT:   bufferization.dealloc
+
+// -----
+
+func.func private @nested_loop_with_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
+  %alloc = memref.alloc() : memref<2xf32>
+  "test.memref_user"(%alloc) : (memref<2xf32>) -> ()
+  cf.br ^outer_loop_check(%arg0, %arg3 : index, memref<2xf32>)
+^outer_loop_check(%0: index, %1: memref<2xf32>):  // 2 preds: ^bb0, ^outer_body
+  %2 = arith.cmpi slt, %0, %arg1 : index
+  cf.cond_br %2, ^outer_body_entry, ^exit
+^outer_body_entry:  // pred: ^outer_loop_check
+  cf.br ^middle_loop_check(%arg0, %1 : index, memref<2xf32>)
+^middle_loop_check(%3: index, %4: memref<2xf32>):  // 2 preds: ^outer_body_entry, ^middle_body
+  %5 = arith.cmpi slt, %3, %arg1 : index
+  cf.cond_br %5, ^middle_body_entry, ^outer_body
+^middle_body_entry:  // pred: ^middle_loop_check
+  cf.br ^inner_loop_check(%arg0, %4 : index, memref<2xf32>)
+^inner_loop_check(%6: index, %7: memref<2xf32>):  // 2 preds: ^middle_body_entry, ^inner_body
+  %8 = arith.cmpi slt, %6, %arg1 : index
+  cf.cond_br %8, ^if_check, ^middle_body
+^if_check:  // pred: ^inner_loop_check
+  %alloc_0 = memref.alloc() : memref<2xf32>
+  "test.memref_user"(%alloc_0) : (memref<2xf32>) -> ()
+  %9 = arith.cmpi eq, %0, %arg1 : index
+  cf.cond_br %9, ^then, ^else
+^then:  // pred: ^if_check
+  %alloc_1 = memref.alloc() : memref<2xf32>
+  cf.br ^join(%alloc_1 : memref<2xf32>)
+^else:  // pred: ^if_check
+  cf.br ^join(%7 : memref<2xf32>)
+^join(%10: memref<2xf32>):  // 2 preds: ^then, ^else
+  cf.br ^inner_body
+^inner_body:  // pred: ^join
+  %11 = arith.addi %6, %arg2 : index
+  cf.br ^inner_loop_check(%11, %10 : index, memref<2xf32>)
+^middle_body:  // pred: ^inner_loop_check
+  %12 = arith.addi %3, %arg2 : index
+  cf.br ^middle_loop_check(%12, %7 : index, memref<2xf32>)
+^outer_body:  // pred: ^middle_loop_check
+  %13 = arith.addi %0, %arg2 : index
+  cf.br ^outer_loop_check(%13, %4 : index, memref<2xf32>)
+^exit:  // pred: ^outer_loop_check
+  test.copy(%1, %arg4) : (memref<2xf32>, memref<2xf32>)
+  return
+}
+
+// CHECK-LABEL: func.func private @nested_loop_with_alloc
+//  CHECK-SAME: ({{.*}}: index, {{.*}}: index, {{.*}}: index, [[ARG3:%.+]]: memref<2xf32>, [[ARG4:%.+]]: memref<2xf32>)
+//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
+//       CHECK:   test.memref_user
+//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
+//       CHECK:   cf.br [[OUTER_LOOP_CHECK:\^.+]]({{.*}}, [[ARG3]], %false
+//       CHECK: [[OUTER_LOOP_CHECK]]({{.*}}: index, [[A0:%.+]]: memref<2xf32>, [[C0:%.+]]: i1):
+//       CHECK:   [[COND:%.+]] = arith.cmpi slt
+//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
+//       CHECK:   [[THEN_OWN:%.+]] = arith.andi [[C0]], [[COND]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_THEN_OWN:%.+]]:2 = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[THEN_OWN]]) retain ([[A0]], [[ARG4]] :
+//       CHECK:   [[NEG_COND:%.+]] = arith.xori [[COND]], %true
+//       CHECK:   [[ELSE_OWN:%.+]] = arith.andi [[C0]], [[NEG_COND]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_ELSE_OWN:%.+]]:2 = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ELSE_OWN]]) retain ([[A0]], [[ARG4]] :
+//       CHECK:   [[UPDATED_OWN:%.+]] = arith.select [[COND]], [[UPDATED_THEN_OWN]]#0, [[UPDATED_ELSE_OWN]]#0
+//       CHECK:   cf.cond_br{{.*}}[[OUTER_BODY_ENTRY:\^.+]], [[EXIT:\^.+]]
+//       CHECK: [[OUTER_BODY_ENTRY]]:
+//       CHECK:   [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN_0:%.+]]:2 = bufferization.dealloc ([[BASE0]] :{{.*}}) if ([[UPDATED_OWN]]) retain ([[A0]], [[ARG4]] :
+//       CHECK:   cf.br [[MIDDLE_LOOP_CHECK:\^.+]]({{.*}}, [[A0]], [[UPDATED_OWN_0]]#0 :
+//       CHECK: [[MIDDLE_LOOP_CHECK]]({{.*}}: index, [[A1:%.+]]: memref<2xf32>, [[C1:%.+]]: i1):
+//       CHECK:   [[COND0:%.+]] = arith.cmpi slt
+//       CHECK:   [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
+//       CHECK:   [[THEN_OWN_0:%.+]] = arith.andi [[C1]], [[COND0]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_THEN_OWN_0:%.+]]:2 = bufferization.dealloc ([[BASE1]] :{{.*}}) if ([[THEN_OWN_0]]) retain ([[A1]], [[ARG4]] :
+//       CHECK:   [[NEG_COND0:%.+]] = arith.xori [[COND0]], %true
+//       CHECK:   [[ELSE_OWN_0:%.+]] = arith.andi [[C1]], [[NEG_COND0]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_ELSE_OWN_0:%.+]]:2 = bufferization.dealloc ([[BASE1]] :{{.*}}) if ([[ELSE_OWN_0]]) retain ([[A1]], [[ARG4]] :
+//       CHECK:   [[UPDATED_OWN_1:%.+]] = arith.select [[COND0]], [[UPDATED_THEN_OWN_0]]#0, [[UPDATED_ELSE_OWN_0]]#0
+//       CHECK:   cf.cond_br
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[BASE2:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN_2:%.+]]:2 = bufferization.dealloc ([[BASE2]] :{{.*}}) if ([[UPDATED_OWN_1]]) retain ([[A1]], [[ARG4]] :
+//       CHECK:   cf.br [[IF_CHECK:\^.+]](%arg0, [[A1]], [[UPDATED_OWN_2]]#0
+//       CHECK: [[IF_CHECK]]({{.*}}: index, [[A2:%.+]]: memref<2xf32>, [[C2:%.+]]: i1):
+//       CHECK:   [[COND1:%.+]] = arith.cmpi slt
+//       CHECK:   [[BASE3:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A2]]
+//       CHECK:   [[THEN_OWN_1:%.+]] = arith.andi [[C2]], [[COND1]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_THEN_OWN_1:%.+]]:2 = bufferization.dealloc ([[BASE3]] :{{.*}}) if ([[THEN_OWN_1]]) retain ([[A2]], [[ARG4]] :
+//       CHECK:   [[NEG_COND1:%.+]] = arith.xori [[COND1]], %true
+//       CHECK:   [[ELSE_OWN_1:%.+]] = arith.andi [[C2]], [[NEG_COND1]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_ELSE_OWN_1:%.+]]:2 = bufferization.dealloc ([[BASE3]] :{{.*}}) if ([[ELSE_OWN_1]]) retain ([[A2]], [[ARG4]] :
+//       CHECK:   [[UPDATED_OWN_3:%.+]] = arith.select [[COND1]], [[UPDATED_THEN_OWN_1]]#0, [[UPDATED_ELSE_OWN_1]]#0
+//       CHECK:   cf.cond_br
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
+//       CHECK:   test.memref_user
+//       CHECK:   [[COND2:%.+]] = arith.cmpi eq
+//       CHECK:   [[BASE4:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A2]]
+//       CHECK:   [[THEN_OWN_2:%.+]] = arith.andi [[UPDATED_OWN_3]], [[COND2]]
+// NOTE: the retained memref can only be optimized away if the two memref function arguments are marked as restrict (guaranteed to not alias)
+//       CHECK:   bufferization.dealloc ([[BASE4]] :{{.*}}) if ([[THEN_OWN_2]]) retain ([[ARG4]] :
+// TODO: this dealloc could be merged with the one below by taking the disjunction of the conditions which would fold to 'true' and thus the dealloc would become unconditional
+//       CHECK:   bufferization.dealloc ([[ALLOC1]] :{{.*}}) if ([[COND2]])
+//       CHECK:   [[NEG_COND2:%.+]] = arith.xori [[COND2]], %true
+//       CHECK:   [[ELSE_OWN_2:%.+]] = arith.andi [[UPDATED_OWN_3]], [[NEG_COND2]]
+//       CHECK:   [[NEG_COND2_1:%.+]] = arith.xori [[COND2]], %true
+//       CHECK:   bufferization.dealloc ([[ALLOC1]] :{{.*}}) if ([[NEG_COND2_1]])
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   bufferization.dealloc ([[BASE4]] :{{.*}}) if ([[ELSE_OWN_2]]) retain ([[A2]], [[ARG4]] :
+//       CHECK:   cf.cond_br
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[ALLOC2:%.+]] = memref.alloc(
+//       CHECK:   cf.br [[JOIN:\^.+]]([[ALLOC2]], %true
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[BASE5:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A2]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN_4:%.+]]:2 = bufferization.dealloc ([[BASE5]] :{{.*}}) if ([[UPDATED_OWN_3]]) retain ([[A2]], [[ARG4]] :
+//       CHECK:   cf.br [[JOIN]]([[A2]], [[UPDATED_OWN_4]]#0
+//       CHECK: [[JOIN]]([[A3:%.+]]: memref<2xf32>, [[C3:%.+]]: i1):
+//       CHECK:   [[BASE6:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A3]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN_5:%.+]]:2 = bufferization.dealloc ([[BASE6]] :{{.*}}) if ([[C3]]) retain ([[A3]], [[ARG4]] :
+//       CHECK:   cf.br
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[BASE7:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A3]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN_6:%.+]]:2 = bufferization.dealloc ([[BASE7]] :{{.*}}) if ([[UPDATED_OWN_5]]#0) retain ([[A3]], [[ARG4]] :
+//       CHECK:   cf.br ^bb5({{.*}}, [[A3]], [[UPDATED_OWN_6]]#0
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[BASE8:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A2]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN_7:%.+]]:2 = bufferization.dealloc ([[BASE8]] :{{.*}}) if ([[UPDATED_OWN_3]]) retain ([[A2]], [[ARG4]] :
+//       CHECK:   cf.br ^bb3({{.*}}, [[A2]], [[UPDATED_OWN_7]]#0
+//       CHECK: ^{{.*}}:
+//       CHECK:   [[BASE9:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
+// TODO: this dealloc can be entirely optimized away
+//       CHECK:   [[UPDATED_OWN_8:%.+]]:2 = bufferization.dealloc ([[BASE9]] :{{.*}}) if ([[UPDATED_OWN_1]]) retain ([[A1]], [[ARG4]] :
+//       CHECK:   cf.br ^bb1({{.*}}, [[A1]], [[UPDATED_OWN_8]]
+//       CHECK: [[EXIT]]:
+//       CHECK:   test.copy
+//       CHECK:   [[BASE10:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
+//       CHECK:   bufferization.dealloc ([[BASE10]] :{{.*}}) if ([[UPDATED_OWN]])
+
+// -----
+
+func.func private @while_loop(%arg0: index) {
+  %alloc = memref.alloc(%arg0) : memref<?xf32>
+  cf.br ^check(%alloc, %alloc, %alloc : memref<?xf32>, memref<?xf32>, memref<?xf32>)
+^check(%0: memref<?xf32>, %1: memref<?xf32>, %2: memref<?xf32>):  // 2 preds: ^bb0, ^body
+  %3 = "test.make_condition"() : () -> i1
+  cf.cond_br %3, ^body(%0, %1, %2 : memref<?xf32>, memref<?xf32>, memref<?xf32>), ^exit
+^body(%4: memref<?xf32>, %5: memref<?xf32>, %6: memref<?xf32>):  // pred: ^check
+  %alloc_0 = memref.alloc(%arg0) : memref<?xf32>
+  %alloc_1 = memref.alloc(%arg0) : memref<?xf32>
+  cf.br ^check(%alloc_1, %alloc_0, %5 : memref<?xf32>, memref<?xf32>, memref<?xf32>)
+^exit:  // pred: ^check
+  return
+}
+
+// CHECK-LABEL: func private @while_loop
+//       CHECK:   [[ALLOC:%.+]] = memref.alloc(
+//       CHECK:   cf.br [[CHECK:\^.+]]([[ALLOC]], [[ALLOC]], [[ALLOC]], %true{{[0-9_]*}}, %true{{[0-9_]*}}, %true
+//       CHECK: [[CHECK]]([[I0:%.+]]: memref<?xf32>, [[I1:%.+]]: memref<?xf32>, [[I2:%.+]]: memref<?xf32>, [[I3:%.+]]: i1, [[I4:%.+]]: i1, [[I5:%.+]]: i1):
+//       CHECK:   [[COND:%.+]] = "test.make_condition"
+//       CHECK:   [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[I0]]
+//       CHECK:   [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[I1]]
+//       CHECK:   [[BASE2:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[I2]]
+//       CHECK:   [[OWN0:%.+]] = arith.andi [[I3]], [[COND]]
+//       CHECK:   [[OWN1:%.+]] = arith.andi [[I4]], [[COND]]
+//       CHECK:   [[OWN2:%.+]] = arith.andi [[I5]], [[COND]]
+// TODO: this dealloc can be optimized away entirely
+//       CHECK:   [[THEN_UPDATED_OWN:%.+]]:3 = bufferization.dealloc ([[BASE0]], [[BASE1]], [[BASE2]] :{{.*}}) if ([[OWN0]], [[OWN1]], [[OWN2]]) retain ([[I0]], [[I1]], [[I2]] :
+//       CHECK:   [[NEG_COND:%.+]] = arith.xori [[COND]], %true
+//       CHECK:   [[OWN3:%.+]] = arith.andi [[I3]], [[NEG_COND]]
+//       CHECK:   [[NEG_COND:%.+]] = arith.xori [[COND]], %true
+//       CHECK:   [[OWN4:%.+]] = arith.andi [[I4]], [[NEG_COND]]
+//       CHECK:   [[NEG_COND:%.+]] = arith.xori [[COND]], %true
+//       CHECK:   [[OWN5:%.+]] = arith.andi [[I5]], [[NEG_COND]]
+// TODO: it would be good to have an optimization that moves this deallocation to the exit block instead
+//       CHECK:   bufferization.dealloc ([[BASE0]], [[BASE1]], [[BASE2]] :{{.*}}) if ([[OWN3]], [[OWN4]], [[OWN5]])
+//       CHECK:   cf.cond_br [[COND]], [[BODY:\^.+]]([[I0]], [[I1]], [[I2]], [[THEN_UPDATED_OWN]]#0, [[THEN_UPDATED_OWN]]#1, [[THEN_UPDATED_OWN]]#2 :{{.*}}), [[EXIT:\^.+]]
+//       CHECK: [[BODY]]([[A0:%.+]]: memref<?xf32>, [[A1:%.+]]: memref<?xf32>, [[A2:%.+]]: memref<?xf32>, [[A3:%.+]]: i1, [[A4:%.+]]: i1, [[A5:%.+]]: i1):
+//       CHECK:   [[ALLOC0:%.+]] = memref.alloc(
+//       CHECK:   [[ALLOC1:%.+]] = memref.alloc(
+//       CHECK:   [[BASE0:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A0]]
+//       CHECK:   [[BASE1:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A1]]
+//       CHECK:   [[BASE2:%[a-zA-Z0-9_]+]]{{.*}} = memref.extract_strided_metadata [[A2]]
+// TODO: this dealloc op could be considerably simplified by some better analysis
+//       CHECK:   [[UPDATED_OWN:%.+]]:3 = bufferization.dealloc ([[BASE0]], [[BASE1]], [[BASE2]], [[ALLOC0]] :{{.*}}) if ([[A3]], [[A4]], [[A5]], %true{{[0-9_]*}}) retain ([[ALLOC1]], [[ALLOC0]], [[A1]] :
+//       CHECK:   cf.br [[CHECK]]([[ALLOC1]], [[ALLOC0]], [[A1]], %true{{[0-9_]*}}, %true{{[0-9_]*}}, [[UPDATED_OWN]]#2 :
+//       CHECK: [[EXIT]]:
+//       CHECK:   return
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/invalid-buffer-deallocation.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/invalid-buffer-deallocation.mlir
index c623891e48362fa..a7995e37c96c451 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/invalid-buffer-deallocation.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/invalid-buffer-deallocation.mlir
@@ -1,71 +1,5 @@
 // RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation -split-input-file %s
 
-
-// Test Case: explicit control-flow loop with a dynamically allocated buffer.
-// The BufferDeallocation transformation should fail on this explicit
-// control-flow loop since they are not supported.
-
-// expected-error at +1 {{Only structured control-flow loops are supported}}
-func.func @loop_dynalloc(
-  %arg0 : i32,
-  %arg1 : i32,
-  %arg2: memref<?xf32>,
-  %arg3: memref<?xf32>) {
-  %const0 = arith.constant 0 : i32
-  cf.br ^loopHeader(%const0, %arg2 : i32, memref<?xf32>)
-
-^loopHeader(%i : i32, %buff : memref<?xf32>):
-  %lessThan = arith.cmpi slt, %i, %arg1 : i32
-  cf.cond_br %lessThan,
-    ^loopBody(%i, %buff : i32, memref<?xf32>),
-    ^exit(%buff : memref<?xf32>)
-
-^loopBody(%val : i32, %buff2: memref<?xf32>):
-  %const1 = arith.constant 1 : i32
-  %inc = arith.addi %val, %const1 : i32
-  %size = arith.index_cast %inc : i32 to index
-  %alloc1 = memref.alloc(%size) : memref<?xf32>
-  cf.br ^loopHeader(%inc, %alloc1 : i32, memref<?xf32>)
-
-^exit(%buff3 : memref<?xf32>):
-  test.copy(%buff3, %arg3) : (memref<?xf32>, memref<?xf32>)
-  return
-}
-
-// -----
-
-// Test Case: explicit control-flow loop with a dynamically allocated buffer.
-// The BufferDeallocation transformation should fail on this explicit
-// control-flow loop since they are not supported.
-
-// expected-error at +1 {{Only structured control-flow loops are supported}}
-func.func @do_loop_alloc(
-  %arg0 : i32,
-  %arg1 : i32,
-  %arg2: memref<2xf32>,
-  %arg3: memref<2xf32>) {
-  %const0 = arith.constant 0 : i32
-  cf.br ^loopBody(%const0, %arg2 : i32, memref<2xf32>)
-
-^loopBody(%val : i32, %buff2: memref<2xf32>):
-  %const1 = arith.constant 1 : i32
-  %inc = arith.addi %val, %const1 : i32
-  %alloc1 = memref.alloc() : memref<2xf32>
-  cf.br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>)
-
-^loopHeader(%i : i32, %buff : memref<2xf32>):
-  %lessThan = arith.cmpi slt, %i, %arg1 : i32
-  cf.cond_br %lessThan,
-    ^loopBody(%i, %buff : i32, memref<2xf32>),
-    ^exit(%buff : memref<2xf32>)
-
-^exit(%buff3 : memref<2xf32>):
-  test.copy(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>)
-  return
-}
-
-// -----
-
 func.func @free_effect() {
   %alloc = memref.alloc() : memref<2xi32>
   // expected-error @below {{memory free side-effect on MemRef value not supported!}}



More information about the Mlir-commits mailing list