[Mlir-commits] [mlir] [mlir][bufferization] Add OwnershipBasedBufferDeallocation pass option to forbid clones (PR #66626)

Martin Erhart llvmlistbot at llvm.org
Mon Sep 18 07:59:24 PDT 2023


https://github.com/maerhart updated https://github.com/llvm/llvm-project/pull/66626

>From 1647897741df4bd162d2cefd02331e2dc5ae9f29 Mon Sep 17 00:00:00 2001
From: Martin Erhart <merhart at google.com>
Date: Mon, 18 Sep 2023 10:18:31 +0000
Subject: [PATCH] [mlir][bufferization] Add OwnershipBasedBufferDeallocation
 pass option to forbid clones

Adds a pass option to the `ownership-based-buffer-deallocation` pass to
forbid insertion of clone operations. This is necessary to support IR
that does not have the property that every buffer write dominates every
buffer read to the same buffer. Instead of silently producing invalid
IR, the pass would then emit an error.
This is a restriction in the old `buffer-deallocation` pass, but the new
function boundary ABI was not enforced in this old pass. Having this
option allows easier migration from the old to the new deallocation
pass because enabling this option allows the new deallocation pass to
fix IR that does not adhere to the function boundary ABI (in some
situations).
---
 .../IR/BufferDeallocationOpInterface.h        |  13 +-
 .../IR/BufferDeallocationOpInterface.td       |   4 +-
 .../Dialect/Bufferization/Pipelines/Passes.h  |  17 ++
 .../Dialect/Bufferization/Transforms/Passes.h |   8 +-
 .../Bufferization/Transforms/Passes.td        |   8 +
 .../BufferDeallocationOpInterfaceImpl.cpp     |   6 +-
 .../IR/BufferDeallocationOpInterface.cpp      |  15 +-
 .../Pipelines/BufferizationPipelines.cpp      |  14 +-
 .../Bufferization/Pipelines/CMakeLists.txt    |   1 +
 .../OwnershipBasedBufferDeallocation.cpp      |  85 ++++---
 .../dealloc-function-boundaries.mlir          |   8 +-
 .../dealloc-memoryeffect-interface.mlir       |  10 +-
 .../dealloc-region-branchop-interface.mlir    | 213 +++++++++++++-----
 .../Linalg/CPU/test-collapse-tensor.mlir      |   2 +-
 .../Linalg/CPU/test-expand-tensor.mlir        |   2 +-
 .../Dialect/Linalg/CPU/test-tensor-e2e.mlir   |   2 +-
 .../llvm-project-overlay/mlir/BUILD.bazel     |   1 +
 17 files changed, 305 insertions(+), 104 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h
index 7ac4592de7875fb..3aa61fae8c6caee 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h
@@ -96,6 +96,14 @@ struct DeallocationOptions {
   // pass the ownership of MemRef values instead of adhering to the function
   // boundary ABI.
   bool privateFuncDynamicOwnership = false;
+
+  // Allows the pass to insert `bufferization.clone` operations. This is useful
+  // for supporting IR that does not adhere to the function boundary ABI
+  // initially (excl. external functions) and to support operations with results
+  // with 'Unknown' ownership. However, it requires that all buffer writes
+  // dominate all buffer reads (i.e., only enable this option if your IR is
+  // guaranteed to have this property).
+  bool allowCloning = false;
 };
 
 /// This class collects all the state that we need to perform the buffer
@@ -142,8 +150,9 @@ class DeallocationState {
   /// a new SSA value, returned as the first element of the pair, which has
   /// 'Unique' ownership and can be used instead of the passed Value with the
   /// the ownership indicator returned as the second element of the pair.
-  std::pair<Value, Value>
-  getMemrefWithUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
+  FailureOr<std::pair<Value, Value>>
+  getMemrefWithUniqueOwnership(const DeallocationOptions &options,
+                               OpBuilder &builder, Value memref, Block *block);
 
   /// Given two basic blocks and the values passed via block arguments to the
   /// destination block, compute the list of MemRefs that have to be retained in
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td
index 3e11432c65c5f08..3b9a9c3f4fef667 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.td
@@ -56,7 +56,7 @@ def BufferDeallocationOpInterface :
           method (which is especially important if operations are created that
           cannot be easily canonicalized away anymore).
         }],
-        /*retType=*/"std::pair<Value, Value>",
+        /*retType=*/"FailureOr<std::pair<Value, Value>>",
         /*methodName=*/"materializeUniqueOwnershipForMemref",
         /*args=*/(ins "DeallocationState &":$state,
                       "const DeallocationOptions &":$options,
@@ -65,7 +65,7 @@ def BufferDeallocationOpInterface :
         /*methodBody=*/[{}],
         /*defaultImplementation=*/[{
           return state.getMemrefWithUniqueOwnership(
-            builder, memref, memref.getParentBlock());
+            options, builder, memref, memref.getParentBlock());
         }]>,
   ];
 }
diff --git a/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h
index 7acacb763cd2c18..7500257ed95eac8 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Pipelines/Passes.h
@@ -17,6 +17,7 @@
 
 namespace mlir {
 namespace bufferization {
+struct DeallocationOptions;
 
 /// Options for the buffer deallocation pipeline.
 struct BufferDeallocationPipelineOptions
@@ -28,6 +29,22 @@ struct BufferDeallocationPipelineOptions
           "dynamically pass ownership of memrefs to callees. This can enable "
           "earlier deallocations."),
       llvm::cl::init(false)};
+  PassOptions::Option<bool> allowCloning{
+      *this, "allow-cloning",
+      llvm::cl::desc(
+          "Allows the pass to insert `bufferization.clone` operations. This is "
+          "useful for supporting IR that does not adhere to the function "
+          "boundary ABI initially (excl. external functions) and to support "
+          "operations with results with 'Unknown' ownership. However, it "
+          "requires that all buffer writes dominate all buffer reads (i.e., "
+          "only enable this option if your IR is guaranteed to have this "
+          "property)."),
+      llvm::cl::init(false)};
+
+  /// Convert this BufferDeallocationPipelineOptions struct to a
+  /// DeallocationOptions struct to be passed to the
+  /// OwnershipBasedBufferDeallocationPass.
+  DeallocationOptions asDeallocationOptions() const;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index 92520eb13da6875..37a3942f7bac6c5 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -1,6 +1,7 @@
 #ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES_H
 #define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES_H
 
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
@@ -31,7 +32,7 @@ std::unique_ptr<Pass> createBufferDeallocationPass();
 /// Creates an instance of the OwnershipBasedBufferDeallocation pass to free all
 /// allocated buffers.
 std::unique_ptr<Pass> createOwnershipBasedBufferDeallocationPass(
-    bool privateFuncDynamicOwnership = false);
+    const DeallocationOptions &options = DeallocationOptions());
 
 /// Creates a pass that optimizes `bufferization.dealloc` operations. For
 /// example, it reduces the number of alias checks needed at runtime using
@@ -134,8 +135,9 @@ func::FuncOp buildDeallocationLibraryFunction(OpBuilder &builder, Location loc,
 LogicalResult deallocateBuffers(Operation *op);
 
 /// Run ownership basedbuffer deallocation.
-LogicalResult deallocateBuffersOwnershipBased(FunctionOpInterface op,
-                                              bool privateFuncDynamicOwnership);
+LogicalResult deallocateBuffersOwnershipBased(
+    FunctionOpInterface op,
+    const DeallocationOptions &options = DeallocationOptions());
 
 /// Creates a pass that moves allocations upwards to reduce the number of
 /// required copies that are inserted during the BufferDeallocation pass.
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 62383e376f6f7a3..5b8af7a975c34b5 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -223,6 +223,14 @@ def OwnershipBasedBufferDeallocation : Pass<
            "Allows to add additional arguments to private functions to "
            "dynamically pass ownership of memrefs to callees. This can enable "
            "earlier deallocations.">,
+    Option<"allowCloning", "allow-cloning", "bool", /*default=*/"false",
+           "Allows the pass to insert `bufferization.clone` operations. This "
+           "is useful for supporting IR that does not adhere to the function "
+           "boundary ABI initially (excl. external functions) and to support "
+           "operations with results with 'Unknown' ownership. However, it "
+           "requires that all buffer writes dominate all buffer reads (i.e., "
+           "only enable this option if your IR is guaranteed to have this "
+           "property).">,
   ];
   let constructor = "mlir::bufferization::createOwnershipBasedBufferDeallocationPass()";
 
diff --git a/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp
index f2e7732e8ea4aa3..8ab4717739a7643 100644
--- a/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Arith/Transforms/BufferDeallocationOpInterfaceImpl.cpp
@@ -53,7 +53,7 @@ struct SelectOpInterface
     return op; // nothing to do
   }
 
-  std::pair<Value, Value>
+  FailureOr<std::pair<Value, Value>>
   materializeUniqueOwnershipForMemref(Operation *op, DeallocationState &state,
                                       const DeallocationOptions &options,
                                       OpBuilder &builder, Value value) const {
@@ -64,14 +64,14 @@ struct SelectOpInterface
     Block *block = value.getParentBlock();
     if (!state.getOwnership(selectOp.getTrueValue(), block).isUnique() ||
         !state.getOwnership(selectOp.getFalseValue(), block).isUnique())
-      return state.getMemrefWithUniqueOwnership(builder, value,
+      return state.getMemrefWithUniqueOwnership(options, builder, value,
                                                 value.getParentBlock());
 
     Value ownership = builder.create<arith::SelectOp>(
         op->getLoc(), selectOp.getCondition(),
         state.getOwnership(selectOp.getTrueValue(), block).getIndicator(),
         state.getOwnership(selectOp.getFalseValue(), block).getIndicator());
-    return {selectOp.getResult(), ownership};
+    return std::make_pair(selectOp.getResult(), ownership);
   }
 };
 
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp
index 407d75e2426e9f9..9ac2e09dec385aa 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferDeallocationOpInterface.cpp
@@ -132,16 +132,21 @@ void DeallocationState::getLiveMemrefsIn(Block *block,
   memrefs.append(liveMemrefs);
 }
 
-std::pair<Value, Value>
-DeallocationState::getMemrefWithUniqueOwnership(OpBuilder &builder,
-                                                Value memref, Block *block) {
+FailureOr<std::pair<Value, Value>>
+DeallocationState::getMemrefWithUniqueOwnership(
+    const DeallocationOptions &options, OpBuilder &builder, Value memref,
+    Block *block) {
   auto iter = ownershipMap.find({memref, block});
   assert(iter != ownershipMap.end() &&
          "Value must already have been registered in the ownership map");
 
   Ownership ownership = iter->second;
   if (ownership.isUnique())
-    return {memref, ownership.getIndicator()};
+    return std::make_pair(memref, ownership.getIndicator());
+
+  if (!options.allowCloning)
+    return emitError(memref.getLoc(),
+                     "MemRef value does not have valid ownership");
 
   // Instead of inserting a clone operation we could also insert a dealloc
   // operation earlier in the block and use the updated ownerships returned by
@@ -155,7 +160,7 @@ DeallocationState::getMemrefWithUniqueOwnership(OpBuilder &builder,
   Value newMemref = cloneOp.getResult();
   updateOwnership(newMemref, condition);
   memrefsToDeallocatePerBlock[newMemref.getParentBlock()].push_back(newMemref);
-  return {newMemref, condition};
+  return std::make_pair(newMemref, condition);
 }
 
 void DeallocationState::getMemrefsToRetain(
diff --git a/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp b/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp
index b2a60feb9a7f011..f08de33345ce605 100644
--- a/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp
+++ b/mlir/lib/Dialect/Bufferization/Pipelines/BufferizationPipelines.cpp
@@ -8,23 +8,35 @@
 
 #include "mlir/Dialect/Bufferization/Pipelines/Passes.h"
 
+#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/Passes.h"
 
+using namespace mlir;
+using namespace bufferization;
+
 //===----------------------------------------------------------------------===//
 // Pipeline implementation.
 //===----------------------------------------------------------------------===//
 
+DeallocationOptions
+BufferDeallocationPipelineOptions::asDeallocationOptions() const {
+  DeallocationOptions opts;
+  opts.privateFuncDynamicOwnership = privateFunctionDynamicOwnership.getValue();
+  opts.allowCloning = allowCloning.getValue();
+  return opts;
+}
+
 void mlir::bufferization::buildBufferDeallocationPipeline(
     OpPassManager &pm, const BufferDeallocationPipelineOptions &options) {
   pm.addNestedPass<func::FuncOp>(
       memref::createExpandReallocPass(/*emitDeallocs=*/false));
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(createOwnershipBasedBufferDeallocationPass(
-      options.privateFunctionDynamicOwnership.getValue()));
+      options.asDeallocationOptions()));
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(createBufferDeallocationSimplificationPass());
   pm.addPass(createLowerDeallocationsPass());
diff --git a/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt
index 6e8dab64ba6b935..d67b28b308fa10e 100644
--- a/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Pipelines/CMakeLists.txt
@@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIRBufferizationPipelines
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Bufferization
 
   LINK_LIBS PUBLIC
+  MLIRBufferizationDialect
   MLIRBufferizationTransforms
   MLIRMemRefTransforms
   MLIRFuncDialect
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
index 43ba11cf132cb92..bc76d6fabfc3f22 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp
@@ -139,10 +139,8 @@ namespace {
 /// program have a corresponding de-allocation.
 class BufferDeallocation {
 public:
-  BufferDeallocation(Operation *op, bool privateFuncDynamicOwnership)
-      : state(op) {
-    options.privateFuncDynamicOwnership = privateFuncDynamicOwnership;
-  }
+  BufferDeallocation(Operation *op, DeallocationOptions options)
+      : state(op), options(options) {}
 
   /// Performs the actual placement/creation of all dealloc operations.
   LogicalResult deallocate(FunctionOpInterface op);
@@ -376,8 +374,9 @@ class BufferDeallocation {
   /// Given an SSA value of MemRef type, returns the same of a new SSA value
   /// which has 'Unique' ownership where the ownership indicator is guaranteed
   /// to be always 'true'.
-  Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
-                                                 Value memref, Block *block);
+  FailureOr<Value> materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
+                                                            Value memref,
+                                                            Block *block);
 
   /// Returns whether the given operation implements FunctionOpInterface, has
   /// private visibility, and the private-function-dynamic-ownership pass option
@@ -391,7 +390,7 @@ class BufferDeallocation {
   /// is requested does not match the block in which 'memref' is defined, the
   /// default implementation in
   /// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead.
-  std::pair<Value, Value>
+  FailureOr<std::pair<Value, Value>>
   materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
 
   /// Checks all the preconditions for operations implementing the
@@ -430,7 +429,7 @@ class BufferDeallocation {
   DeallocationState state;
 
   /// Collects all pass options in a single place.
-  DeallocationOptions options;
+  const DeallocationOptions options;
 };
 
 } // namespace
@@ -439,13 +438,13 @@ class BufferDeallocation {
 // BufferDeallocation Implementation
 //===----------------------------------------------------------------------===//
 
-std::pair<Value, Value>
+FailureOr<std::pair<Value, Value>>
 BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
                                                Block *block) {
   // The interface can only materialize ownership indicators in the same block
   // as the defining op.
   if (memref.getParentBlock() != block)
-    return state.getMemrefWithUniqueOwnership(builder, memref, block);
+    return state.getMemrefWithUniqueOwnership(options, builder, memref, block);
 
   Operation *owner = memref.getDefiningOp();
   if (!owner)
@@ -458,7 +457,7 @@ BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
         state, options, builder, memref);
 
   // Otherwise use the default implementation.
-  return state.getMemrefWithUniqueOwnership(builder, memref, block);
+  return state.getMemrefWithUniqueOwnership(options, builder, memref, block);
 }
 
 static bool regionOperatesOnMemrefValues(Region &region) {
@@ -710,13 +709,17 @@ BufferDeallocation::handleInterface(RegionBranchOpInterface op) {
   return newOp.getOperation();
 }
 
-Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
+FailureOr<Value> BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
     OpBuilder &builder, Value memref, Block *block) {
   // First, make sure we at least have 'Unique' ownership already.
-  std::pair<Value, Value> newMemrefAndOnwership =
+  FailureOr<std::pair<Value, Value>> newMemrefAndOnwership =
       materializeUniqueOwnership(builder, memref, block);
-  Value newMemref = newMemrefAndOnwership.first;
-  Value condition = newMemrefAndOnwership.second;
+
+  if (failed(newMemrefAndOnwership))
+    return failure();
+
+  Value newMemref = newMemrefAndOnwership->first;
+  Value condition = newMemrefAndOnwership->second;
 
   // Avoid inserting additional IR if ownership is already guaranteed. In
   // particular, this is already the case when we had 'Unknown' ownership
@@ -817,8 +820,13 @@ FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
         newOperands.push_back(operand);
         continue;
       }
-      auto [memref, condition] =
+      FailureOr<std::pair<Value, Value>> memrefAndCondition =
           materializeUniqueOwnership(builder, operand, op->getBlock());
+
+      if (failed(memrefAndCondition))
+        return failure();
+
+      auto [memref, condition] = *memrefAndCondition;
       newOperands.push_back(memref);
       ownershipIndicatorsToAdd.push_back(condition);
     }
@@ -901,8 +909,28 @@ BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
       if (!isMemref(val.get()))
         continue;
 
-      val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(),
-                                                       op->getBlock()));
+      if (options.allowCloning) {
+        // Here we assume that all memref write operations dominate all memref
+        // read operations, but the function boundary ABI of non-external
+        // functions does not necessarily have to be adhered to.
+        FailureOr<Value> newMemref = materializeMemrefWithGuaranteedOwnership(
+            builder, val.get(), op->getBlock());
+
+        if (failed(newMemref))
+          return failure();
+
+        val.set(*newMemref);
+      } else {
+        // Here memrew writes don't have to dominate reads, but the function
+        // boundary ABI has to be adhered to from the start.
+        FailureOr<std::pair<Value, Value>> newMemref =
+            materializeUniqueOwnership(builder, val.get(), op->getBlock());
+
+        if (failed(newMemref))
+          return failure();
+
+        val.set(newMemref->first);
+      }
     }
   }
 
@@ -995,17 +1023,21 @@ struct OwnershipBasedBufferDeallocationPass
     : public bufferization::impl::OwnershipBasedBufferDeallocationBase<
           OwnershipBasedBufferDeallocationPass> {
   OwnershipBasedBufferDeallocationPass() = default;
-  OwnershipBasedBufferDeallocationPass(bool privateFuncDynamicOwnership)
+  OwnershipBasedBufferDeallocationPass(const DeallocationOptions &options)
       : OwnershipBasedBufferDeallocationPass() {
-    this->privateFuncDynamicOwnership.setValue(privateFuncDynamicOwnership);
+    privateFuncDynamicOwnership.setValue(options.privateFuncDynamicOwnership);
+    allowCloning.setValue(options.allowCloning);
   }
   void runOnOperation() override {
     func::FuncOp func = getOperation();
     if (func.isExternal())
       return;
 
-    if (failed(
-            deallocateBuffersOwnershipBased(func, privateFuncDynamicOwnership)))
+    DeallocationOptions options;
+    options.privateFuncDynamicOwnership =
+        privateFuncDynamicOwnership.getValue();
+    options.allowCloning = allowCloning.getValue();
+    if (failed(deallocateBuffersOwnershipBased(func, options)))
       signalPassFailure();
   }
 };
@@ -1017,9 +1049,9 @@ struct OwnershipBasedBufferDeallocationPass
 //===----------------------------------------------------------------------===//
 
 LogicalResult bufferization::deallocateBuffersOwnershipBased(
-    FunctionOpInterface op, bool privateFuncDynamicOwnership) {
+    FunctionOpInterface op, const DeallocationOptions &options) {
   // Gather all required allocation nodes and prepare the deallocation phase.
-  BufferDeallocation deallocation(op, privateFuncDynamicOwnership);
+  BufferDeallocation deallocation(op, options);
 
   // Place all required temporary clone and dealloc nodes.
   return deallocation.deallocate(op);
@@ -1031,7 +1063,6 @@ LogicalResult bufferization::deallocateBuffersOwnershipBased(
 
 std::unique_ptr<Pass>
 mlir::bufferization::createOwnershipBasedBufferDeallocationPass(
-    bool privateFuncDynamicOwnership) {
-  return std::make_unique<OwnershipBasedBufferDeallocationPass>(
-      privateFuncDynamicOwnership);
+    const DeallocationOptions &options) {
+  return std::make_unique<OwnershipBasedBufferDeallocationPass>(options);
 }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir
index 13c55d0289880ef..387df9aaa688ae5 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-function-boundaries.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=false \
+// RUN: mlir-opt --allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=allow-cloning=true \
 // RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
 // RUN: mlir-opt --allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true \
 // RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s --check-prefix=CHECK-DYNAMIC
@@ -92,6 +92,12 @@ func.func private @redundantOperations(%arg0: memref<2xf32>) {
 // since they are operands of return operation and should escape from
 // deallocating. It should dealloc %y after CopyOp.
 
+// Note: when dynamic ownership is disabled, we need to allow cloning in this 
+// example because a function argument is returned again which is against the
+// function boundary ABI. Buffer deallocation will fix this by inserting an
+// additional clone operation, but as a prerequisite all buffer writes have to
+// dominate all buffer reads.
+
 func.func private @memref_in_function_results(
   %arg0: memref<5xf32>,
   %arg1: memref<10xf32>,
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir
index 44cf16385603e07..93c7c7b23775202 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-memoryeffect-interface.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation \
+// RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation=allow-cloning=true \
 // RUN:   --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
 // RUN: mlir-opt -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null
 
@@ -110,6 +110,14 @@ func.func @dealloc_existing_clones(%arg0: memref<?x?xf64>, %arg1: memref<?x?xf64
 
 // -----
 
+// Note: memref.get_global does not provide ownership of the memref it returns
+// because a global constant must not be deallocated. However, the function
+// boundary ABI requires to return ownership for function results. Enabling
+// "allow-cloning" fixes this issue automatically but requires buffer writes to
+// dominate all buffer reads (not just for this memref but for all of them in
+// the IR). Otherwise, a clone operation has to be inserted manually before
+// running buffer deallocation.
+
 memref.global "private" constant @__constant_4xf32 : memref<4xf32> = dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]>
 
 func.func @op_without_aliasing_and_allocation() -> memref<4xf32> {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir
index dc372749fc074be..bac43d4109fb2fb 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation/dealloc-region-branchop-interface.mlir
@@ -1,6 +1,8 @@
 // RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation \
 // RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck %s
 // RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=private-function-dynamic-ownership=true -split-input-file %s > /dev/null
+// RUN: mlir-opt -allow-unregistered-dialect -verify-diagnostics -ownership-based-buffer-deallocation=allow-cloning=true \
+// RUN:  --buffer-deallocation-simplification -split-input-file %s | FileCheck --check-prefix=CLONES %s
 
 // RUN: mlir-opt %s -buffer-deallocation-pipeline --split-input-file --verify-diagnostics > /dev/null
 
@@ -55,6 +57,8 @@ func.func @nested_regions_and_cond_branch(
 //  CHECK-NEXT:   bufferization.dealloc ([[BASE]] : {{.*}}) if ([[COND0]])
 //       CHECK:   return
 
+// CLONES-LABEL: func @nested_regions_and_cond_branch
+
 // -----
 
 // Test Case: nested region control flow
@@ -85,13 +89,23 @@ func.func @nested_region_control_flow(
 //       CHECK:     bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
 //   CHECK-NOT: retain
 //       CHECK:     scf.yield [[ALLOC]], %false
-//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:     scf.yield [[V0]]#0
-//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:     scf.yield [[CLONE]]
-//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] : {{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK:   return [[V1]]
+//       CHECK:   return [[V0]]#0
+
+// CLONES-LABEL: func @nested_region_control_flow
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc(
+//       CLONES:   [[V0:%.+]]:2 = scf.if
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:     [[ALLOC1:%.+]] = memref.alloc(
+//       CLONES:     bufferization.dealloc ([[ALLOC1]] :{{.*}}) if (%true{{[0-9_]*}})
+//   CLONES-NOT: retain
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:   [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:     scf.yield [[V0]]#0
+//       CLONES:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:     scf.yield [[CLONE]]
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES:   bufferization.dealloc ([[ALLOC]], [[BASE]] : {{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES:   return [[V1]]
 
 // -----
 
@@ -120,13 +134,22 @@ func.func @nested_region_control_flow_div(
 //       CHECK:     scf.yield [[ALLOC]], %false
 //       CHECK:     [[ALLOC1:%.+]] = memref.alloc(
 //       CHECK:     scf.yield [[ALLOC1]], %true
-//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:     scf.yield [[V0]]#0
-//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:     scf.yield [[CLONE]]
-//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK:   return [[V1]]
+//       CHECK:   bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true{{[0-9_]*}}) retain ([[V0]]#0 :
+//       CHECK:   return [[V0]]#0
+
+// CLONES-LABEL: func @nested_region_control_flow_div
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc(
+//       CLONES:   [[V0:%.+]]:2 = scf.if
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:     [[ALLOC1:%.+]] = memref.alloc(
+//       CLONES:     scf.yield [[ALLOC1]], %true
+//       CLONES:   [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:     scf.yield [[V0]]#0
+//       CLONES:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:     scf.yield [[CLONE]]
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES:   return [[V1]]
 
 // -----
 
@@ -158,13 +181,25 @@ func.func @inner_region_control_flow(%arg0 : index) -> memref<?x?xf32> {
 //       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
 //       CHECK:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
 //       CHECK:     test.region_if_yield [[ARG1]], [[ARG2]]
-//       CHECK:   [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:     scf.yield [[V0]]#0
-//       CHECK:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:     scf.yield [[CLONE]]
-//       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK:   return [[V1]]
+//   CHECK-NOT:   bufferization.dealloc
+//       CHECK:   return [[V0]]#0
+
+// CLONES-LABEL: func.func @inner_region_control_flow
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc(
+//       CLONES:   [[V0:%.+]]:2 = test.region_if [[ALLOC]], %false
+//       CLONES:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
+//       CLONES:     test.region_if_yield [[ARG1]], [[ARG2]]
+//       CLONES:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
+//       CLONES:     test.region_if_yield [[ARG1]], [[ARG2]]
+//       CLONES:   ^bb0([[ARG1:%.+]]: memref<?x?xf32>, [[ARG2:%.+]]: i1):
+//       CLONES:     test.region_if_yield [[ARG1]], [[ARG2]]
+//       CLONES:   [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:     scf.yield [[V0]]#0
+//       CLONES:     [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:     scf.yield [[CLONE]]
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES:   bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES:   return [[V1]]
 
 // -----
 
@@ -209,6 +244,8 @@ func.func @nestedRegionsAndCondBranchAlloca(
 //       CHECK:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[A0]]
 //       CHECK:   bufferization.dealloc ([[BASE]] :{{.*}}) if ([[COND]])
 
+// CLONES-LABEL: func @nestedRegionsAndCondBranchAlloca
+
 // -----
 
 func.func @nestedRegionControlFlowAlloca(
@@ -232,13 +269,22 @@ func.func @nestedRegionControlFlowAlloca(
 //       CHECK:   scf.yield [[ALLOC]], %false
 //       CHECK:   memref.alloca(
 //       CHECK:   scf.yield [[ALLOC]], %false
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK: return [[V1]]
+//   CHECK-NOT: bufferization.dealloc
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @nestedRegionControlFlowAlloca
+//       CLONES: [[ALLOC:%.+]] = memref.alloc(
+//       CLONES: [[V0:%.+]]:2 = scf.if
+//       CLONES:   scf.yield [[ALLOC]], %false
+//       CLONES:   memref.alloca(
+//       CLONES:   scf.yield [[ALLOC]], %false
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES: return [[V1]]
 
 // -----
 
@@ -279,6 +325,8 @@ func.func @loop_alloc(
 //       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1)
 //   CHECK-NOT: retain
 
+// CLONES-LABEL: func @loop_alloc
+
 // -----
 
 // Test Case: structured control-flow loop with a nested if operation.
@@ -326,6 +374,8 @@ func.func @loop_nested_if_no_alloc(
 // TODO: we know statically that the inner dealloc will never deallocate
 //       anything, i.e., we can optimize it away
 
+// CLONES-LABEL: func @loop_nested_if_no_alloc
+
 // -----
 
 // Test Case: structured control-flow loop with a nested if operation using
@@ -364,13 +414,29 @@ func.func @loop_nested_if_alloc(
 //       CHECK:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
 //       CHECK:   scf.yield [[V1]]#0, [[OWN_AGG]]
 //       CHECK: }
-//       CHECK: [[V2:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V2]] :
-//       CHECK: return [[V2]]
+//       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true{{[0-9_]*}}) retain ([[V0]]#0 :
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @loop_nested_if_alloc
+//  CLONES-SAME: ({{.*}}, [[ARG3:%.+]]: memref<2xf32>)
+//       CLONES: [[ALLOC:%.+]] = memref.alloc()
+//       CLONES: [[V0:%.+]]:2 = scf.for {{.*}} iter_args([[ARG5:%.+]] = [[ARG3]], [[ARG6:%.+]] = %false
+//       CLONES:   [[V1:%.+]]:2 = scf.if
+//       CLONES:     [[ALLOC1:%.+]] = memref.alloc()
+//       CLONES:     scf.yield [[ALLOC1]], %true
+//       CLONES:     scf.yield [[ALLOC]], %false
+//       CLONES:   [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[ARG5]]
+//       CLONES:   [[OWN:%.+]] = bufferization.dealloc ([[BASE]] :{{.*}}) if ([[ARG6]]) retain ([[V1]]#0 :
+//       CLONES:   [[OWN_AGG:%.+]] = arith.ori [[OWN]], [[V1]]#1
+//       CLONES:   scf.yield [[V1]]#0, [[OWN_AGG]]
+//       CLONES: }
+//       CLONES: [[V2:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V2]] :
+//       CLONES: return [[V2]]
 
 // -----
 
@@ -447,6 +513,8 @@ func.func @loop_nested_alloc(
 
 // TODO: all the retain operands could be removed by doing some more thorough analysis
 
+// CLONES-LABEL: func @loop_nested_alloc
+
 // -----
 
 func.func @affine_loop() -> f32 {
@@ -466,6 +534,8 @@ func.func @affine_loop() -> f32 {
 //       CHECK:   affine.yield
 //       CHECK: bufferization.dealloc ([[ALLOC]] :{{.*}}) if (%true
 
+// CLONES-LABEL: func @affine_loop
+
 // -----
 
 func.func @assumingOp(
@@ -508,6 +578,8 @@ func.func @assumingOp(
 //   CHECK-NOT: retain
 //       CHECK: return
 
+// CLONES-LABEL: func @assumingOp
+
 // -----
 
 // Test Case: The op "test.bar" does not implement the RegionBranchOpInterface.
@@ -570,6 +642,8 @@ func.func @while_two_arg(%arg0: index) {
 //       CHECK: [[BASE1:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#1
 //       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE0]], [[BASE1]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#2, [[V0]]#3)
 
+// CLONES-LABEL: func @while_two_arg
+
 // -----
 
 func.func @while_three_arg(%arg0: index) {
@@ -606,6 +680,8 @@ func.func @while_three_arg(%arg0: index) {
 
 // TODO: better alias analysis could simplify the dealloc inside the body further
 
+// CLONES-LABEL: func @while_three_arg
+
 // -----
 
 // Memref allocated in `then` region and passed back to the parent if op.
@@ -626,17 +702,25 @@ func.func @test_affine_if_1(%arg0: memref<10xf32>) -> memref<10xf32> {
 //       CHECK:   [[ALLOC:%.+]] = memref.alloc()
 //       CHECK:   affine.yield [[ALLOC]], %true
 //       CHECK:   affine.yield [[ARG0]], %false
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) retain ([[V1]] :
-//       CHECK: return [[V1]]
+//       CHECK: return [[V0]]#0
 
 // TODO: the dealloc could be optimized away since the memref to be deallocated
 //       either aliases with V1 or the condition is false
 
+// CLONES-LABEL: func @test_affine_if_1
+//  CLONES-SAME: ([[ARG0:%.*]]: memref<10xf32>)
+//       CLONES: [[V0:%.+]]:2 = affine.if
+//       CLONES:   [[ALLOC:%.+]] = memref.alloc()
+//       CLONES:   affine.yield [[ALLOC]], %true
+//       CLONES:   affine.yield [[ARG0]], %false
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[BASE]] :{{.*}}) if ([[V0]]#1) retain ([[V1]] :
+//       CLONES: return [[V1]]
+
 // -----
 
 // Memref allocated before parent IfOp and used in `then` region.
@@ -652,19 +736,28 @@ func.func @test_affine_if_2() -> memref<10xf32> {
   }
   return %0 : memref<10xf32>
 }
+
 // CHECK-LABEL: func @test_affine_if_2
 //       CHECK: [[ALLOC:%.+]] = memref.alloc()
 //       CHECK: [[V0:%.+]]:2 = affine.if
 //       CHECK:   affine.yield [[ALLOC]], %false
 //       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
 //       CHECK:   affine.yield [[ALLOC1]], %true
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
-//       CHECK: return [[V1]]
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @test_affine_if_2
+//       CLONES: [[ALLOC:%.+]] = memref.alloc()
+//       CLONES: [[V0:%.+]]:2 = affine.if
+//       CLONES:   affine.yield [[ALLOC]], %false
+//       CLONES:   [[ALLOC1:%.+]] = memref.alloc()
+//       CLONES:   affine.yield [[ALLOC1]], %true
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]] :
+//       CLONES: return [[V1]]
 
 // -----
 
@@ -688,10 +781,18 @@ func.func @test_affine_if_3() -> memref<10xf32> {
 //       CHECK:   [[ALLOC1:%.+]] = memref.alloc()
 //       CHECK:   affine.yield [[ALLOC1]], %true
 //       CHECK:   affine.yield [[ALLOC]], %false
-//       CHECK: [[V1:%.+]] = scf.if [[V0]]#1
-//       CHECK:   scf.yield [[V0]]#0
-//       CHECK:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
-//       CHECK:   scf.yield [[CLONE]]
-//       CHECK: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
-//       CHECK: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]]
-//       CHECK: return [[V1]]
+//       CHECK: return [[V0]]#0
+
+// CLONES-LABEL: func @test_affine_if_3
+//       CLONES: [[ALLOC:%.+]] = memref.alloc()
+//       CLONES: [[V0:%.+]]:2 = affine.if
+//       CLONES:   [[ALLOC1:%.+]] = memref.alloc()
+//       CLONES:   affine.yield [[ALLOC1]], %true
+//       CLONES:   affine.yield [[ALLOC]], %false
+//       CLONES: [[V1:%.+]] = scf.if [[V0]]#1
+//       CLONES:   scf.yield [[V0]]#0
+//       CLONES:   [[CLONE:%.+]] = bufferization.clone [[V0]]#0
+//       CLONES:   scf.yield [[CLONE]]
+//       CLONES: [[BASE:%[a-zA-Z0-9_]+]],{{.*}} = memref.extract_strided_metadata [[V0]]#0
+//       CLONES: bufferization.dealloc ([[ALLOC]], [[BASE]] :{{.*}}) if (%true{{[0-9_]*}}, [[V0]]#1) retain ([[V1]]
+//       CLONES: return [[V1]]
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
index 43e423d4c3e8e14..768fde5d6dcb992 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline=allow-cloning=true -convert-bufferization-to-memref \
 // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \
 // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
index a101b76ef186b5e..0db04eb68efa416 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline=allow-cloning=true -convert-bufferization-to-memref \
 // RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \
 // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
index 38b49cd444df3c1..521e669e890c84a 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -arith-bufferize -linalg-bufferize \
-// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \
+// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline=allow-cloning=true -convert-bufferization-to-memref -convert-linalg-to-loops \
 // RUN: -convert-arith-to-llvm -convert-scf-to-cf -convert-cf-to-llvm --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 5d1574162aa690c..e96d17976186862 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -12223,6 +12223,7 @@ cc_library(
     hdrs = ["include/mlir/Dialect/Bufferization/Pipelines/Passes.h"],
     includes = ["include"],
     deps = [
+        ":BufferizationDialect",
         ":BufferizationToMemRef",
         ":BufferizationTransforms",
         ":FuncDialect",



More information about the Mlir-commits mailing list