[Mlir-commits] [mlir] [mlir][bufferization] Remove allow-return-allocs and create-deallocs pass options, remove bufferization.escape attribute (PR #66619)

Mon Sep 18 02:02:56 PDT 2023

https://github.com/maerhart created https://github.com/llvm/llvm-project/pull/66619

This commit removes the deallocation capabilities of one-shot-bufferization. One-shot-bufferization should never deallocate any memrefs as this should be entirely handled by the ownership-based-buffer-deallocation pass going forward. This means the `allow-return-allocs` pass option will default to true now, `create-deallocs` defaults to false and they, as well as the escape attribute indicating whether a memref escapes the current region, will be removed. A new `allow-return-allocs-from-loops` option is added as a temporary workaround for some bufferization limitations.

Already reviewed in https://reviews.llvm.org/D156662

Depends on #66517
Thus, only review the top commit

>From f9f77e2d94dc09ce4ac4fbb1c44822e5215e0236 Mon Sep 17 00:00:00 2001
From: Martin Erhart <merhart at google.com>
Date: Fri, 15 Sep 2023 11:08:53 +0200
Subject: [PATCH 1/2] [mlir][bufferization] Switch tests to new deallocation
 pass pipeline

Use the new ownership based deallocation pass pipeline in the regression
and integration tests. Some one-shot bufferization tests tested one-shot
bufferize and deallocation at the same time. I removed the deallocation
pass there because the deallocation pass is already thoroughly tested by
itself.

Fixed version of #66471
---
 ...ne-shot-bufferize-allow-return-allocs.mlir |  11 +-
 .../Transforms/one-shot-bufferize-compat.mlir |   2 +-
 .../one-shot-module-bufferize-out-params.mlir |  14 +--
 mlir/test/Dialect/SCF/one-shot-bufferize.mlir | 104 ++++--------------
 .../Linalg/CPU/test-collapse-tensor.mlir      |   4 +-
 .../Dialect/Linalg/CPU/test-elementwise.mlir  |   8 +-
 .../Linalg/CPU/test-expand-tensor.mlir        |   4 +-
 .../Dialect/Linalg/CPU/test-padtensor.mlir    |   5 +-
 .../test-subtensor-insert-multiple-uses.mlir  |   6 +-
 .../Linalg/CPU/test-subtensor-insert.mlir     |   6 +-
 .../Dialect/Linalg/CPU/test-tensor-e2e.mlir   |   4 +-
 .../Linalg/CPU/test-tensor-matmul.mlir        |   4 +-
 12 files changed, 54 insertions(+), 118 deletions(-)

diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
index ab9360e45c65ff3..bccff8ef8d65aaa 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs allow-unknown-ops" -buffer-deallocation -canonicalize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
 // RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
@@ -14,20 +14,17 @@ func.func @buffer_not_deallocated(%t : tensor<?xf32>, %c : i1) -> tensor<?xf32>
     // CHECK: %[[some_op:.*]] = "test.some_op"
     // CHECK: %[[alloc:.*]] = memref.alloc(%[[some_op]])
     // CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
-    // CHECK-NOT: dealloc
     // CHECK: scf.yield %[[casted]]
     %sz = "test.some_op"() : () -> (index)
     %0 = bufferization.alloc_tensor(%sz) : tensor<?xf32>
     scf.yield %0 : tensor<?xf32>
   } else {
   // CHECK: } else {
-    // CHECK: %[[cloned:.*]] = bufferization.clone %[[m]]
-    // CHECK: scf.yield %[[cloned]]
+    // CHECK: scf.yield %[[m]]
     scf.yield %t : tensor<?xf32>
   }
   // CHECK: }
   // CHECK: %[[r_tensor:.*]] = bufferization.to_tensor %[[r]]
-  // CHECK: memref.dealloc %[[r]]
   // CHECK: return %[[r_tensor]]
   return %r : tensor<?xf32>
 }
@@ -42,8 +39,7 @@ func.func @write_to_alloc_tensor_or_readonly_tensor(%arg0: tensor<i32>,
 {
   // CHECK: %[[arg0_m:.*]] = bufferization.to_memref %[[arg0]]
   // CHECK: %[[r:.*]] = scf.if {{.*}} {
-  // CHECK:   %[[clone:.*]] = bufferization.clone %[[arg0_m]]
-  // CHECK:   scf.yield %[[clone]]
+  // CHECK:   scf.yield %[[arg0_m]]
   // CHECK: } else {
   // CHECK:   %[[alloc:.*]] = memref.alloc
   // CHECK:   memref.store %{{.*}}, %[[alloc]]
@@ -51,7 +47,6 @@ func.func @write_to_alloc_tensor_or_readonly_tensor(%arg0: tensor<i32>,
   // CHECK:   scf.yield %[[casted]]
   // CHECK: }
   // CHECK: %[[r_t:.*]] = bufferization.to_tensor %[[r]]
-  // CHECK: memref.dealloc %[[r]]
   // CHECK: return %[[r_t]]
   %3 = scf.if %cond -> (tensor<i32>) {
     scf.yield %arg0 : tensor<i32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
index 06c79d450cea705..9ebd60f50328e14 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
@@ -5,7 +5,7 @@
 
 // RUN: mlir-opt %s \
 // RUN:     -one-shot-bufferize="allow-unknown-ops create-deallocs=0" \
-// RUN:     -buffer-deallocation | \
+// RUN:     -buffer-deallocation-pipeline | \
 // RUN: FileCheck %s --check-prefix=CHECK-BUFFERDEALLOC
 
 // CHECK-NODEALLOC-LABEL: func @out_of_place_bufferization
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
index 5d876e935df1af6..4e4340c9db8acea 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=fully-dynamic-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -buffer-deallocation -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=identity-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -buffer-deallocation -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=infer-layout-map" -drop-equivalent-buffer-results -buffer-deallocation -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=fully-dynamic-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=identity-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=infer-layout-map" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE
 
 // Note: function-boundary-type-conversion=infer-layout-map with
 // promote-buffer-results-to-out-params is an unsupported combination.
@@ -18,7 +18,6 @@
 //       CHECK:   memref.store %{{.*}}, %[[alloc]]
 //       CHECK:   %[[casted:.*]] = memref.cast %[[alloc]]
 //       CHECK:   memref.copy %[[casted]], %[[arg1]]
-//       CHECK:   memref.dealloc %[[alloc]]
 //       CHECK:   return
 //       CHECK: }
 
@@ -29,7 +28,6 @@
 //       CHECK-NO-LAYOUT:   memref.copy %[[arg0]], %[[alloc]]
 //       CHECK-NO-LAYOUT:   memref.store {{.*}}, %[[alloc]]
 //       CHECK-NO-LAYOUT:   memref.copy %[[alloc]], %[[arg1]]
-//       CHECK-NO-LAYOUT:   memref.dealloc %[[alloc]]
 
 // CHECK-BASELINE-LABEL: func @callee(
 //  CHECK-BASELINE-SAME:     %[[arg0:.*]]: memref<5xf32, strided<[?], offset: ?>>) -> memref<5xf32> {
@@ -53,7 +51,6 @@ func.func @callee(%t: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) {
 // CHECK:   call @callee(%[[arg0]], %[[casted]])
 // CHECK:   %[[l1:.*]] = memref.load %[[arg0]]
 // CHECK:   %[[l2:.*]] = memref.load %[[casted]]
-// CHECK:   memref.dealloc %[[alloc]]
 // CHECK:   return %[[l1]], %[[l2]]
 // CHECK: }
 
@@ -78,7 +75,6 @@ func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
 //       CHECK:   %[[subview:.*]] = memref.subview %[[alloc]]{{.*}} : memref<10x20xf32> to memref<2x5xf32, strided<[20, 1], offset: ?>>
 //       CHECK:   %[[casted:.*]] = memref.cast %[[subview]]
 //       CHECK:   memref.copy %[[casted]], %[[r]]
-//       CHECK:   memref.dealloc %[[alloc]]
 
 // CHECK-NO-LAYOUT-LABEL: func @callee(
 //  CHECK-NO-LAYOUT-SAME:              %{{.*}}: index,
@@ -90,9 +86,7 @@ func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
 // value and function signature.
 //       CHECK-NO-LAYOUT:   %[[alloc2:.*]] = memref.alloc() : memref<2x5xf32>
 //       CHECK-NO-LAYOUT:   memref.copy %[[subview]], %[[alloc2]]
-//       CHECK-NO-LAYOUT:   memref.dealloc %[[alloc]]
 //       CHECK-NO-LAYOUT:   memref.copy %[[alloc2]], %[[r]]
-//       CHECK-NO-LAYOUT:   memref.dealloc %[[alloc2]]
 
 // CHECK-BASELINE-LABEL: func @callee(
 //  CHECK-BASELINE-SAME:     %{{.*}}: index) -> memref<2x5xf32, strided<[20, 1], offset: ?>> {
@@ -110,13 +104,11 @@ func.func @callee(%idx: index) -> tensor<2x5xf32> {
 // CHECK:   %[[casted:.*]] = memref.cast %[[alloc]] : memref<2x5xf32> to memref<2x5xf32, strided<[?, ?], offset: ?>>
 // CHECK:   call @callee(%{{.*}}, %[[casted]])
 // CHECK:   memref.load %[[casted]]
-// CHECK:   memref.dealloc %[[alloc]]
 
 // CHECK-NO-LAYOUT: func @main(
 // CHECK-NO-LAYOUT:   %[[alloc:.*]] = memref.alloc() : memref<2x5xf32>
 // CHECK-NO-LAYOUT:   call @callee(%{{.*}}, %[[alloc]])
 // CHECK-NO-LAYOUT:   memref.load %[[alloc]]
-// CHECK-NO-LAYOUT:   memref.dealloc
 
 // CHECK-BASELINE: func @main(
 // CHECK-BASELINE:   %[[call:.*]] = call @callee
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index b01592dcad74488..a8c488461c74edd 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -1,5 +1,4 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -cse -canonicalize -drop-equivalent-buffer-results -buffer-deallocation -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-NO-DEALLOC-PASS
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -cse -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
 // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
@@ -7,7 +6,7 @@
 // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -buffer-deallocation -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // CHECK-LABEL: func @scf_for_yield_only(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
@@ -52,8 +51,7 @@ func.func @scf_for_is_reading(%A : tensor<?xf32>, %B : tensor<?xf32>,
 
   // CHECK: %[[alloc:.*]] = memref.alloc
   // CHECK: memref.copy %[[A]], %[[alloc]]
-  // CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]]
-  // CHECK: scf.for {{.*}} iter_args(%{{.*}} = %[[clone]])
+  // CHECK: scf.for {{.*}} iter_args(%{{.*}} = %[[alloc]])
   %0 = scf.for %iv = %lb to %ub step %c1 iter_args(%1 = %A) -> tensor<?xf32> {
     %r = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf32>) -> tensor<?xf32>
     scf.yield %B : tensor<?xf32>
@@ -235,7 +233,6 @@ func.func @scf_if_non_equiv_yields(
 // CHECK-LABEL: func @scf_execute_region_yield_non_equivalent(
 //       CHECK:   %[[alloc:.*]] = memref.alloc(%{{.*}})
 //       CHECK:   %[[r:.*]] = memref.load %[[alloc]][%{{.*}}]
-//       CHECK:   memref.dealloc %[[alloc]]
 //       CHECK:   return %[[r]]
 func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32 {
   %r = scf.execute_region -> (tensor<?xf32>) {
@@ -256,16 +253,11 @@ func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32
 //  CHECK-SAME:     %[[t:.*]]: memref<?xf32
 //       CHECK:   %[[alloc:.*]] = memref.alloc(%{{.*}})
 //       CHECK:   memref.copy %[[t]], %[[alloc]]
-//       CHECK:   %[[cloned:.*]] = bufferization.clone %[[t]]
-//       CHECK:   %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[cloned]])
-//   CHECK-DAG:     memref.dealloc %[[iter]]
+//       CHECK:   %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[t]])
 //   CHECK-DAG:     %[[alloc2:.*]] = memref.alloc(%{{.*}})
 //       CHECK:     memref.copy %[[alloc]], %[[alloc2]]
 //       CHECK:     %[[alloc2_casted:.*]] = memref.cast %[[alloc2]]
-//       CHECK:     %[[cloned2:.*]] = bufferization.clone %[[alloc2_casted]]
-//       CHECK:     memref.dealloc %[[alloc2]]
-//       CHECK:     scf.yield %[[cloned2]]
-//       CHECK:   memref.dealloc %[[alloc]]
+//       CHECK:     scf.yield %[[alloc2_casted]]
 //       CHECK:   return %[[for]]
 func.func @scf_for_yield_non_equivalent(
     %t: tensor<?xf32>, %lb : index, %ub : index, %step : index) -> tensor<?xf32> {
@@ -284,19 +276,14 @@ func.func @scf_for_yield_non_equivalent(
 
 // CHECK-LABEL: func @scf_for_yield_allocation(
 //  CHECK-SAME:     %[[t:.*]]: memref<?xf32
-//       CHECK:   %[[cloned:.*]] = bufferization.clone %[[t]]
-//       CHECK:   %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[cloned]])
+//       CHECK:   %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[t]])
 // This alloc is for the bufferization.alloc_tensor.
 //   CHECK-DAG:     %[[alloc2:.*]] = memref.alloc(%{{.*}})
-//   CHECK-DAG:     memref.dealloc %[[iter]]
 // This alloc is for the scf.yield.
 //       CHECK:     %[[alloc3:.*]] = memref.alloc(%{{.*}})
 //       CHECK:     memref.copy %[[alloc2]], %[[alloc3]]
-//       CHECK:     memref.dealloc %[[alloc2]]
 //       CHECK:     %[[casted3:.*]] = memref.cast %[[alloc3]]
-//       CHECK:     %[[cloned3:.*]] = bufferization.clone %[[casted3]]
-//       CHECK:     memref.dealloc %[[alloc3]]
-//       CHECK:     scf.yield %[[cloned3]]
+//       CHECK:     scf.yield %[[casted3]]
 //       CHECK:   return %[[for]]
 func.func @scf_for_yield_allocation(%t: tensor<?xf32>, %lb : index, %ub : index,
                                %step : index) -> tensor<?xf32> {
@@ -320,9 +307,7 @@ func.func @scf_for_swapping_yields(
     %C : tensor<4xf32>, %lb : index, %ub : index, %step : index)
   -> (f32, f32)
 {
-//   CHECK-DAG:   %[[clone1:.*]] = bufferization.clone %[[A]]
-//   CHECK-DAG:   %[[clone2:.*]] = bufferization.clone %[[B]]
-//       CHECK:   %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[clone1]], %[[iter2:.*]] = %[[clone2]])
+//       CHECK:   %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A]], %[[iter2:.*]] = %[[B]])
   %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
       -> (tensor<?xf32>, tensor<?xf32>)
   {
@@ -335,25 +320,17 @@ func.func @scf_for_swapping_yields(
 
 //       CHECK:     %[[alloc2:.*]] = memref.alloc(%{{.*}})
 //       CHECK:     memref.copy %[[iter2]], %[[alloc2]]
-//       CHECK:     memref.dealloc %[[iter2]]
 //       CHECK:     %[[alloc1:.*]] = memref.alloc(%{{.*}})
 //       CHECK:     memref.copy %[[iter1]], %[[alloc1]]
-//       CHECK:     memref.dealloc %[[iter1]]
 //       CHECK:     %[[casted2:.*]] = memref.cast %[[alloc2]]
 //       CHECK:     %[[casted1:.*]] = memref.cast %[[alloc1]]
-//       CHECK:     %[[cloned1:.*]] = bufferization.clone %[[casted1]]
-//       CHECK:     memref.dealloc %[[alloc1]]
-//       CHECK:     %[[cloned2:.*]] = bufferization.clone %[[casted2]]
-//       CHECK:     memref.dealloc %[[alloc2]]
-//       CHECK:     scf.yield %[[cloned2]], %[[cloned1]]
+//       CHECK:     scf.yield %[[casted2]], %[[casted1]]
     // Yield tensors in different order.
     scf.yield %ttB, %ttA : tensor<?xf32>, tensor<?xf32>
   }
 
 //       CHECK:     %[[r0:.*]] = memref.load %[[for]]#0
-//       CHECK:     memref.dealloc %[[for]]#0
 //       CHECK:     %[[r1:.*]] = memref.load %[[for]]#1
-//       CHECK:     memref.dealloc %[[for]]#1
   %f0 = tensor.extract %r0#0[%step] : tensor<?xf32>
   %f1 = tensor.extract %r0#1[%step] : tensor<?xf32>
 //       CHECK:     return %[[r0]], %[[r1]]
@@ -399,23 +376,15 @@ func.func @scf_while_non_equiv_condition(%arg0: tensor<5xi1>,
                                          %idx: index)
   -> (tensor<5xi1>, tensor<5xi1>)
 {
-  // CHECK: %[[clone1:.*]] = bufferization.clone %[[arg1]]
-  // CHECK: %[[clone0:.*]] = bufferization.clone %[[arg0]]
-  // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[clone0]], %[[w1:.*]] = %[[clone1]]) {{.*}} {
+  // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[arg0]], %[[w1:.*]] = %[[arg1]]) {{.*}} {
   %r0, %r1 = scf.while (%w0 = %arg0, %w1 = %arg1)
       : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) {
     // CHECK: %[[condition:.*]] = memref.load %[[w0]]
     // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1>
     // CHECK: memref.copy %[[w1]], %[[a1]]
-    // CHECK: memref.dealloc %[[w1]]
     // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1>
     // CHECK: memref.copy %[[w0]], %[[a0]]
-    // CHECK: memref.dealloc %[[w0]]
-    // CHECK: %[[cloned1:.*]] = bufferization.clone %[[a1]]
-    // CHECK: memref.dealloc %[[a1]]
-    // CHECK: %[[cloned0:.*]] = bufferization.clone %[[a0]]
-    // CHECK: memref.dealloc %[[a0]]
-    // CHECK: scf.condition(%[[condition]]) %[[cloned1]], %[[cloned0]]
+    // CHECK: scf.condition(%[[condition]]) %[[a1]], %[[a0]]
     %condition = tensor.extract %w0[%idx] : tensor<5xi1>
     scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1>
   } do {
@@ -425,11 +394,7 @@ func.func @scf_while_non_equiv_condition(%arg0: tensor<5xi1>,
     // CHECK: memref.store %{{.*}}, %[[b0]]
     // CHECK: %[[casted0:.*]] = memref.cast %[[b0]] : memref<5xi1> to memref<5xi1, strided{{.*}}>
     // CHECK: %[[casted1:.*]] = memref.cast %[[b1]] : memref<5xi1> to memref<5xi1, strided{{.*}}>
-    // CHECK: %[[cloned2:.*]] = bufferization.clone %[[casted1]]
-    // CHECK: memref.dealloc %[[b1]]
-    // CHECK: %[[cloned3:.*]] = bufferization.clone %[[casted0]]
-    // CHECK: memref.dealloc %[[b0]]
-    // CHECK: scf.yield %[[cloned3]], %[[cloned2]]
+    // CHECK: scf.yield %[[casted0]], %[[casted1]]
     // CHECK: }
     %pos = "dummy.some_op"() : () -> (index)
     %val = "dummy.another_op"() : () -> (i1)
@@ -452,23 +417,15 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>,
                                                   %idx: index)
   -> (tensor<5xi1>, tensor<5xi1>)
 {
-  // CHECK-DAG: %[[clone1:.*]] = bufferization.clone %[[arg1]]
-  // CHECK-DAG: %[[clone0:.*]] = bufferization.clone %[[arg0]]
-  // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[clone0]], %[[w1:.*]] = %[[clone1]]) {{.*}} {
+  // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[arg0]], %[[w1:.*]] = %[[arg1]]) {{.*}} {
   %r0, %r1 = scf.while (%w0 = %arg0, %w1 = %arg1)
       : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) {
     // CHECK: %[[condition:.*]] = memref.load %[[w0]]
     // CHECK: %[[a1:.*]] = memref.alloc() {{.*}} : memref<5xi1>
     // CHECK: memref.copy %[[w1]], %[[a1]]
-    // CHECK: memref.dealloc %[[w1]]
     // CHECK: %[[a0:.*]] = memref.alloc() {{.*}} : memref<5xi1>
     // CHECK: memref.copy %[[w0]], %[[a0]]
-    // CHECK: memref.dealloc %[[w0]]
-    // CHECK: %[[cloned1:.*]] = bufferization.clone %[[a1]]
-    // CHECK: memref.dealloc %[[a1]]
-    // CHECK: %[[cloned0:.*]] = bufferization.clone %[[a0]]
-    // CHECK: memref.dealloc %[[a0]]
-    // CHECK: scf.condition(%[[condition]]) %[[cloned1]], %[[cloned0]]
+    // CHECK: scf.condition(%[[condition]]) %[[a1]], %[[a0]]
     %condition = tensor.extract %w0[%idx] : tensor<5xi1>
     scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1>
   } do {
@@ -478,11 +435,7 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>,
     // CHECK: memref.store %{{.*}}, %[[b0]]
     // CHECK: %[[casted1:.*]] = memref.cast %[[b1]]
     // CHECK: %[[casted0:.*]] = memref.cast %[[b0]]
-    // CHECK: %[[cloned1:.*]] = bufferization.clone %[[casted1]]
-    // CHECK: memref.dealloc %[[b1]]
-    // CHECK: %[[cloned0:.*]] = bufferization.clone %[[casted0]]
-    // CHECK: memref.dealloc %[[b0]]
-    // CHECK: scf.yield %[[cloned1]], %[[cloned0]]
+    // CHECK: scf.yield %[[casted1]], %[[casted0]]
     // CHECK: }
     %pos = "dummy.some_op"() : () -> (index)
     %val = "dummy.another_op"() : () -> (i1)
@@ -498,9 +451,7 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>,
 
 // CHECK-LABEL: func @scf_while_iter_arg_result_mismatch(
 //  CHECK-SAME:     %[[arg0:.*]]: memref<5xi1, strided{{.*}}>, %[[arg1:.*]]: memref<5xi1, strided{{.*}}>
-//       CHECK:   %[[clone:.*]] = bufferization.clone %[[arg1]]
-//       CHECK:   scf.while (%[[arg3:.*]] = %[[clone]]) : (memref<5xi1, strided{{.*}}) -> () {
-//   CHECK-DAG:     memref.dealloc %[[arg3]]
+//       CHECK:   scf.while (%[[arg3:.*]] = %[[arg1]]) : (memref<5xi1, strided{{.*}}) -> () {
 //   CHECK-DAG:     %[[load:.*]] = memref.load %[[arg0]]
 //       CHECK:     scf.condition(%[[load]])
 //       CHECK:   } do {
@@ -508,9 +459,7 @@ func.func @scf_while_non_equiv_condition_and_body(%arg0: tensor<5xi1>,
 //       CHECK:     memref.copy %[[arg0]], %[[alloc2]]
 //       CHECK:     memref.store %{{.*}}, %[[alloc2]]
 //       CHECK:     %[[casted:.*]] = memref.cast %[[alloc2]] : memref<5xi1> to memref<5xi1, strided{{.*}}>
-//       CHECK:     %[[cloned:.*]] = bufferization.clone %[[casted]]
-//       CHECK:     memref.dealloc %[[alloc2]]
-//       CHECK:     scf.yield %[[cloned]]
+//       CHECK:     scf.yield %[[casted]]
 //       CHECK:   }
 func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>,
                                               %arg1: tensor<5xi1>,
@@ -609,7 +558,6 @@ func.func @parallel_insert_slice_with_conflict(
 
   // CHECK: %[[load:.*]] = memref.load %[[arg2]]
   // CHECK: %[[load2:.*]] = memref.load %[[alloc1]]
-  // CHECK: memref.dealloc %[[alloc1]]
   %f = tensor.extract %arg2[%c0] : tensor<?xf32>
   %f2 = tensor.extract %2[%c0] : tensor<?xf32>
 
@@ -721,15 +669,12 @@ func.func @scf_if_memory_space(%c: i1, %f: f32, %cst: f32) -> (f32, f32)
   %filled = linalg.fill ins(%cst : f32) outs(%alloc : tensor<5xf32>) -> tensor<5xf32>
   // CHECK: scf.if %{{.*}} -> (memref<5xf32, 1>) {
   %1 = scf.if %c -> tensor<5xf32> {
-    // CHECK: %[[cloned:.*]] = bufferization.clone %[[alloc]]
-    // CHECK: scf.yield %[[cloned]]
+    // CHECK: scf.yield %[[alloc]]
     scf.yield %filled : tensor<5xf32>
   } else {
     // CHECK: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
     // CHECK: memref.store %{{.*}}, %[[alloc2]]
-    // CHECK: %[[cloned2:.*]] = bufferization.clone %[[alloc2]]
-    // CHECK: memref.dealloc %[[alloc2]]
-    // CHECK: scf.yield %[[cloned2]]
+    // CHECK: scf.yield %[[alloc2]]
     %2 = tensor.insert %f into %filled[%c0] : tensor<5xf32>
     scf.yield %2 : tensor<5xf32>
   }
@@ -744,7 +689,6 @@ func.func @scf_if_memory_space(%c: i1, %f: f32, %cst: f32) -> (f32, f32)
 // CHECK: memref.alloc() {{.*}} : memref<5xf32, 1>
 // CHECK: memref.store
 // CHECK: memref.load
-// CHECK: memref.dealloc
 func.func @scf_execute_region_memory_space(%f: f32) -> f32 {
   %c0 = arith.constant 0 : index
   %0 = scf.execute_region -> tensor<5xf32> {
@@ -955,24 +899,24 @@ func.func @regression_cast_in_loop() -> tensor<2xindex> {
 // This test does not compute anything meaningful but it tests that
 // bufferizesToMemoryWrite is correctly propagated through regions.
 
-// CHECK-NO-DEALLOC-PASS-LABEL: func @elide_copy_of_non_writing_scf_if(
+// CHECK-LABEL: func @elide_copy_of_non_writing_scf_if(
 func.func @elide_copy_of_non_writing_scf_if(%c: i1, %p1: index, %p2: index, %f: f32)
   -> (tensor<10xf32>, f32)
 {
   %r = scf.if %c -> tensor<10xf32> {
-    // CHECK-NO-DEALLOC-PASS: memref.alloc
+    // CHECK: memref.alloc
     %t1 = bufferization.alloc_tensor() : tensor<10xf32>
     scf.yield %t1 : tensor<10xf32>
   } else {
-    // CHECK-NO-DEALLOC-PASS: memref.alloc
+    // CHECK: memref.alloc
     %t2 = bufferization.alloc_tensor() : tensor<10xf32>
     scf.yield %t2 : tensor<10xf32>
   }
 
   // No copy should be inserted because %r does not bufferize to a memory write.
   // I.e., %r does not have defined contents and the copy can be elided.
-  // CHECK-NO-DEALLOC-PASS-NOT: memref.alloc
-  // CHECK-NO-DEALLOC-PASS-NOT: memref.copy
+  // CHECK-NOT: memref.alloc
+  // CHECK-NOT: memref.copy
   %r2 = tensor.insert %f into %r[%p1] : tensor<10xf32>
   %r3 = tensor.extract %r[%p2] : tensor<10xf32>
   return %r2, %r3 : tensor<10xf32>, f32
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
index 1580778167321ed..43e423d4c3e8e14 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-collapse-tensor.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation  \
-// RUN: -expand-strided-metadata -lower-affine \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \
 // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir
index 8331757000ba487..84dad567ced3ffb 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-elementwise.mlir
@@ -1,8 +1,8 @@
 // RUN: mlir-opt %s -convert-elementwise-to-linalg \
-// RUN: -arith-bufferize -linalg-bufferize -tensor-bufferize \
-// RUN: -func-bufferize -buffer-deallocation -convert-linalg-to-loops \
-// RUN:  --finalize-memref-to-llvm -convert-func-to-llvm \
-// RUN: -reconcile-unrealized-casts | \
+// RUN: -arith-bufferize -linalg-bufferize -tensor-bufferize -func-bufferize \
+// RUN: -canonicalize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \
+// RUN: -convert-scf-to-cf -convert-arith-to-llvm -convert-cf-to-llvm --finalize-memref-to-llvm \
+// RUN: -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
 // RUN: | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
index b7084a30654ab62..a101b76ef186b5e 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-expand-tensor.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation  \
-// RUN: -expand-strided-metadata -lower-affine \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -convert-scf-to-cf -expand-strided-metadata -lower-affine -convert-cf-to-llvm -convert-arith-to-llvm \
 // RUN: -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
index 967c4d58e71b4f2..54a2bbf8d46809d 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
@@ -1,8 +1,9 @@
 // RUN: mlir-opt %s -test-linalg-transform-patterns=test-linalg-to-vector-patterns \
 // RUN: -empty-tensor-to-alloc-tensor -linalg-bufferize -arith-bufferize \
 // RUN: -bufferization-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation \
-// RUN: -convert-linalg-to-loops -convert-scf-to-cf  -expand-strided-metadata -lower-affine -convert-arith-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \
+// RUN: -lower-affine -convert-arith-to-llvm -finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils \
 // RUN: | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
index 18dd8bf183cb90a..98fce6c020c03dd 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert-multiple-uses.mlir
@@ -1,7 +1,9 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation \
-// RUN: -convert-linalg-to-loops -convert-scf-to-cf  -expand-strided-metadata -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata  \
+// RUN: -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm \
+// RUN: -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils \
 // RUN: | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
index 8002db794a40172..cf7d0c762ea36f0 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-subtensor-insert.mlir
@@ -1,7 +1,9 @@
 // RUN: mlir-opt %s -linalg-bufferize \
 // RUN: -arith-bufferize -tensor-bufferize -func-bufferize \
-// RUN: -finalizing-bufferize -buffer-deallocation \
-// RUN: -convert-linalg-to-loops -convert-scf-to-cf  -expand-strided-metadata -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref \
+// RUN: -convert-linalg-to-loops -convert-scf-to-cf -expand-strided-metadata \
+// RUN: -lower-affine -convert-arith-to-llvm --finalize-memref-to-llvm \
+// RUN: -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
 // RUN: | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
index 715d62137f342c6..38b49cd444df3c1 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-e2e.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -arith-bufferize -linalg-bufferize \
-// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation -convert-linalg-to-loops \
-// RUN:  --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops \
+// RUN: -convert-arith-to-llvm -convert-scf-to-cf -convert-cf-to-llvm --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_runner_utils \
 // RUN: | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
index e1b2e247d0619dd..ee503810de148b6 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -1,7 +1,7 @@
 // UNSUPPORTED: asan
 // RUN: mlir-opt %s -test-transform-dialect-erase-schedule -linalg-bufferize -arith-bufferize \
-// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation -convert-linalg-to-loops -convert-scf-to-cf \
-// RUN:  -expand-strided-metadata -lower-affine -convert-arith-to-llvm -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation-pipeline -convert-bufferization-to-memref -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: -expand-strided-metadata -lower-affine -convert-arith-to-llvm -convert-scf-to-cf --finalize-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils \
 // RUN: | FileCheck %s

>From 63c5836a7c45d882cb29f40de83cbc00ddaf21d7 Mon Sep 17 00:00:00 2001
From: Martin Erhart <merhart at google.com>
Date: Mon, 18 Sep 2023 08:54:35 +0000
Subject: [PATCH 2/2] [mlir][bufferization] Remove allow-return-allocs and
 create-deallocs pass options, remove bufferization.escape attribute

This is the first commit in a series with the goal to rework the
BufferDeallocation pass. Currently, this pass heavily relies on copies
to perform correct deallocations, which leads to very slow code and
potentially high memory usage. Additionally, there are unsupported cases
such as returning memrefs which this series of commits aims to add
support for as well.

This first commit removes the deallocation capabilities of
one-shot-bufferization.One-shot-bufferization should never deallocate any
memrefs as this should be entirely handled by the buffer-deallocation pass
going forward. This means the allow-return-allocs pass option will
default to true now, create-deallocs defaults to false and they, as well
as the escape attribute indicating whether a memref escapes the current region,
will be removed.

The documentation should w.r.t. these pass option changes should also be
updated in this commit.

Already reviewed in https://reviews.llvm.org/D156662
---
 mlir/docs/Bufferization.md                    |  46 ++----
 .../IR/BufferizableOpInterface.h              |  16 +-
 .../Bufferization/IR/BufferizationBase.td     |  10 --
 .../TransformOps/BufferizationTransformOps.td |   3 +-
 .../Transforms/OneShotAnalysis.h              |   6 +-
 .../Bufferization/Transforms/Passes.td        |  23 +--
 .../Dialect/SparseTensor/Pipelines/Passes.h   |   3 +-
 .../IR/BufferizableOpInterface.cpp            |  61 +-------
 .../Bufferization/IR/BufferizationDialect.cpp |  35 -----
 .../Bufferization/IR/BufferizationOps.cpp     |  11 --
 .../BufferizationTransformOps.cpp             |   5 +-
 .../Bufferization/Transforms/Bufferize.cpp    |  13 +-
 .../Transforms/EmptyTensorElimination.cpp     |   2 +-
 .../FuncBufferizableOpInterfaceImpl.cpp       |   8 +-
 .../Transforms/OneShotAnalysis.cpp            |  78 ----------
 .../Transforms/TensorCopyInsertion.cpp        |  21 ---
 .../BufferizableOpInterfaceImpl.cpp           |   7 +-
 .../TransformOps/LinalgTransformOps.cpp       |   2 +-
 .../BufferizableOpInterfaceImpl.cpp           |  23 +--
 .../Transforms/SparseTensorConversion.cpp     |   6 -
 .../Transforms/SparseTensorRewriting.cpp      |   8 -
 .../SparsificationAndBufferizationPass.cpp    |   3 -
 .../BufferizableOpInterfaceImpl.cpp           |  40 ++---
 .../_bufferization_transform_ops_ext.py       |  12 +-
 .../Dialect/Arith/one-shot-bufferize.mlir     |  10 +-
 ...ne-shot-bufferize-allow-return-allocs.mlir |   8 +-
 ...ize-analysis-empty-tensor-elimination.mlir |   2 +-
 .../Transforms/one-shot-bufferize-compat.mlir |  22 +--
 ...ot-bufferize-empty-tensor-elimination.mlir |   2 +-
 .../one-shot-bufferize-partial.mlir           |  16 +-
 .../one-shot-bufferize-pass-statistics.mlir   |   1 -
 .../Transforms/one-shot-bufferize.mlir        |   6 -
 ...-module-bufferize-allow-return-allocs.mlir |  12 +-
 .../one-shot-module-bufferize-analysis.mlir   |   8 +-
 .../one-shot-module-bufferize-invalid.mlir    | 147 ------------------
 .../one-shot-module-bufferize-out-params.mlir |   6 +-
 .../Transforms/one-shot-module-bufferize.mlir |  21 +--
 .../tensor-copy-insertion-memory-space.mlir   |   6 +-
 .../Transforms/tensor-copy-insertion.mlir     |  27 ++--
 .../Transforms/transform-ops.mlir             |   3 -
 mlir/test/Dialect/Bufferization/invalid.mlir  |  32 ----
 .../one-shot-bufferize-analysis.mlir          |   2 +-
 .../one-shot-bufferize-invalid.mlir           |   2 +-
 .../ControlFlow/one-shot-bufferize.mlir       |   4 +-
 .../Linalg/one-shot-bufferize-analysis.mlir   |   2 +-
 .../Dialect/Linalg/one-shot-bufferize.mlir    |  12 +-
 ...erize-allow-return-allocs-no-deallocs.mlir |   5 +-
 .../SCF/one-shot-bufferize-analysis.mlir      |   8 +-
 .../SCF/one-shot-bufferize-invalid.mlir       |   2 +-
 ...-shot-bufferize-tensor-copy-insertion.mlir |  30 ++--
 mlir/test/Dialect/SCF/one-shot-bufferize.mlir |  10 +-
 ..._shot_bufferize_tensor_copy_insertion.mlir |  24 +--
 .../Dialect/SparseTensor/sparse_sddmm.mlir    |  12 +-
 ...-shot-bufferize-tensor-copy-insertion.mlir |   6 +-
 .../Dialect/Tensor/one-shot-bufferize.mlir    |  48 +-----
 .../Linalg/CPU/test-one-shot-bufferize.mlir   |   2 +-
 .../Bufferization/TestTensorCopyInsertion.cpp |  13 +-
 .../dialects/transform_bufferization_ext.py   |   6 +-
 58 files changed, 181 insertions(+), 778 deletions(-)

diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md
index 09bec06743c7a65..d9d0751cae8c9dd 100644
--- a/mlir/docs/Bufferization.md
+++ b/mlir/docs/Bufferization.md
@@ -266,42 +266,16 @@ must be inserted due to a RaW conflict. E.g.:
 In the above example, a buffer copy of buffer(`%another_tensor`) (with `%cst`
 inserted) is yielded from the "then" branch.
 
-In both examples, a buffer is allocated inside of a block and then yielded from
-the block. Deallocation of such buffers is tricky and not currently implemented
-in an efficient way. For this reason, One-Shot Bufferize must be explicitly
-configured with `allow-return-allocs` to support such IR.
-
-When running with `allow-return-allocs`, One-Shot Bufferize may introduce
-allocations that cannot be deallocated by One-Shot Bufferize yet. For that
-reason, `-buffer-deallocation` must be run after One-Shot Bufferize. This buffer
-deallocation pass resolves yields of newly allocated buffers with copies. E.g.,
-the `scf.if` example above would bufferize to IR similar to the following:
-
-```mlir
-%0 = scf.if %c -> (memref<?xf32>) {
-  %1 = memref.alloc(...) : memref<?xf32>
-  ...
-  scf.yield %1 : memref<?xf32>
-} else {
-  %2 = memref.alloc(...) : memref<?xf32>
-  memref.copy %another_memref, %2
-  scf.yield %2 : memref<?xf32>
-}
-```
-
-In the bufferized IR, both branches return a newly allocated buffer, so it does
-not matter which if-branch was taken. In both cases, the resulting buffer `%0`
-must be deallocated at some point after the `scf.if` (unless the `%0` is
-returned/yielded from its block).
-
-Note: Buffer allocations that are returned from a function are not deallocated,
-not even with `-buffer-deallocation`. It is the caller's responsibility to
-deallocate the buffer. In the future, this could be automated with allocation
-hoisting (across function boundaries) or reference counting.
-
-One-Shot Bufferize can be configured to leak all memory and not generate any
-buffer deallocations with `create-deallocs=0`. This can be useful for
-compatibility with legacy code that has its own method of deallocating buffers.
+Note: Buffer allocations that are returned from a function are not deallocated.
+It is the caller's responsibility to deallocate the buffer. For the full
+function boundary ABI for MemRefs w.r.t. buffer deallocation refer to the
+[*Function Boundary ABI*](#function-boundary-abi) section. In the future, this
+could be automated with allocation hoisting (across function boundaries) or
+reference counting.
+
+One-Shot Bufferize leaks all memory and does not generate any buffer
+deallocations. The `-buffer-deallocation-pipeline` has to be run afterwards to
+insert the deallocation operations.
 
 ## Ownership-based Buffer Deallocation
 
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 9ec44dfd16a0c00..1c715f8b9a53ef3 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -361,10 +361,6 @@ struct BufferizationOptions {
   /// used.
   UnknownTypeConverterFn unknownTypeConverterFn = nullptr;
 
-  /// Specifies whether dealloc ops should be generated along with alloc ops. If
-  /// not, new memory allocations will leak.
-  bool createDeallocs = true;
-
   /// Seed for the analysis fuzzer. If set to `0`, the fuzzer is deactivated.
   /// Should be used only with `testAnalysisOnly = true`.
   unsigned analysisFuzzerSeed = 0;
@@ -588,13 +584,9 @@ class AnalysisState {
 /// undefined contents is allocated.
 FailureOr<Value>
 allocateTensorForShapedValue(OpBuilder &b, Location loc, Value shapedValue,
-                             bool escape, const BufferizationOptions &options,
+                             const BufferizationOptions &options,
                              bool copy = true);
 
-/// Return `true` if the allocation of the given op is guaranteed to not escape
-/// the containing block.
-bool allocationDoesNotEscape(OpResult opResult);
-
 /// Lookup the buffer for the given value. If the value was not bufferized
 /// yet, wrap it in a ToMemrefOp. Otherwise, it is the result of a ToTensorOp,
 /// from which the memref operand is returned.
@@ -641,12 +633,6 @@ OpTy replaceOpWithNewBufferizedOp(RewriterBase &rewriter, Operation *op,
   return newOp;
 }
 
-/// Return `true` if the buffer of given OpResult should be deallocated. This
-/// function should be called during `BufferizableOpInterface::bufferize`
-/// implementations that allocate a new buffer for the given OpResult.
-bool shouldDeallocateOpResult(OpResult opResult,
-                              const BufferizationOptions &options);
-
 /// Return a MemRefType to which the type of the given value can be bufferized.
 ///
 /// If possible, op bufferization implementations should not use this function
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
index e9c140859344ef8..0d509e69349e918 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationBase.td
@@ -60,16 +60,6 @@ def Bufferization_Dialect : Dialect {
     /// arguments during One-Shot Module Bufferize.
     constexpr const static ::llvm::StringLiteral
         kBufferLayoutAttrName = "bufferization.buffer_layout";
-
-    /// Attribute name used to mark escaping behavior of buffer allocations.
-    /// Escaping allocations cannot be deallocated in the same block and must
-    /// be treated specially: They are currently deallocated with the
-    /// BufferDeallocation pass.
-    ///
-    /// Note: Only ops with at least one OpResult that bufferizes to a buffer
-    /// allocation (as per BufferizableOpInterface) may have this attribute.
-    constexpr const static ::llvm::StringLiteral
-        kEscapeAttrName = "bufferization.escape";
   }];
   let hasOperationAttrVerify = 1;
 }
diff --git a/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td b/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td
index 84bd047e6d51eed..a0eb5ff00cb9fea 100644
--- a/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.td
@@ -82,10 +82,9 @@ def OneShotBufferizeOp
   let arguments = (
       ins TransformHandleTypeInterface:$target,
       OptionalAttr<LayoutMapOption>:$function_boundary_type_conversion,
-      DefaultValuedAttr<BoolAttr, "false">:$allow_return_allocs,
+      DefaultValuedAttr<BoolAttr, "false">:$allow_return_allocs_from_loops,
       DefaultValuedAttr<BoolAttr, "false">:$allow_unknown_ops,
       DefaultValuedAttr<BoolAttr, "false">:$bufferize_function_boundaries,
-      DefaultValuedAttr<BoolAttr, "true">:$create_deallocs,
       DefaultValuedAttr<BoolAttr, "false">:$test_analysis_only,
       DefaultValuedAttr<BoolAttr, "false">:$print_conflicts,
       DefaultValuedAttr<StrAttr, "\"memref.copy\"">:$memcpy_op);
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h
index 585c7ca92c71895..328aff07280a92b 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h
@@ -28,9 +28,9 @@ struct OneShotBufferizationOptions : public BufferizationOptions {
 
   OneShotBufferizationOptions() = default;
 
-  /// Specifies whether returning newly allocated memrefs should be allowed.
-  /// Otherwise, a pass failure is triggered.
-  bool allowReturnAllocs = false;
+  /// Specifies whether returning newly allocated memrefs from loops should be
+  /// allowed.  Otherwise, a pass failure is triggered.
+  bool allowReturnAllocsFromLoops = false;
 
   /// Specifies whether the tensor IR should be annotated with alias sets.
   bool dumpAliasSets = false;
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index f3c2a29c0589f29..62383e376f6f7a3 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -387,15 +387,9 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
     example, `tensor.generate` is not in destination-passing style and always
     results in a new buffer allocation.
 
-    One-Shot Bufferize deallocates all buffers that it allocates. Yielding newly
-    allocated buffers from a block can lead to bad performance because
-    additional buffer copies are often needed to make sure that every buffer
-    allocation is also deallocated again. By default, such IR is rejected by
-    One-Shot Bufferize. Such IR can be allowed with `allow-return-allocs`. In
-    that case, the `-buffer-deallocation` pass should be run after One-Shot
-    Bufferize. Note that new buffer allocations that are returned from a
-    function can currently not be deallocated by `-buffer-deallocation` and
-    leak.
+    One-Shot Bufferize does not deallocate any buffers that it allocates. The
+    `-buffer-deallocation` pass should be run after One-Shot Bufferize to insert
+    the deallocation operations necessary to eliminate memory leaks.
 
     One-Shot Bufferize will by default reject IR that contains non-bufferizable
     op, i.e., ops that do not implemement BufferizableOpInterface. Such IR can
@@ -462,9 +456,9 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
     `test-analysis-only`.
   }];
   let options = [
-    Option<"allowReturnAllocs", "allow-return-allocs", "bool",
-            /*default=*/"false",
-           "Allows returning/yielding new allocations from a block.">,
+    Option<"allowReturnAllocsFromLoops", "allow-return-allocs-from-loops",
+           "bool", /*default=*/"false",
+           "Allows returning/yielding new allocations from a loop.">,
     Option<"allowUnknownOps", "allow-unknown-ops", "bool",
            /*default=*/"false",
            "Allows unknown (not bufferizable) ops in the input IR.">,
@@ -479,9 +473,6 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
            "Bufferize function boundaries (experimental).">,
     Option<"copyBeforeWrite", "copy-before-write", "bool", /*default=*/"false",
            "Skip the analysis. Make a buffer copy on every write.">,
-    Option<"createDeallocs", "create-deallocs", "bool", /*default=*/"true",
-           "Specify if buffers should be deallocated. For compatibility with "
-           "core bufferization passes.">,
     ListOption<"dialectFilter", "dialect-filter", "std::string",
                "Restrict bufferization to ops from these dialects.">,
     Option<"dumpAliasSets", "dump-alias-sets", "bool", /*default=*/"false",
@@ -513,8 +504,6 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
   let statistics = [
     Statistic<"numBufferAlloc", "num-buffer-alloc",
               "Number of buffer allocations">,
-    Statistic<"numBufferDealloc", "num-buffer-dealloc",
-              "Number of buffer deallocations">,
     Statistic<"numTensorInPlace", "num-tensor-in-place",
               "Number of in-place tensor OpOperands">,
     Statistic<"numTensorOutOfPlace", "num-tensor-out-of-place",
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
index 71fcfc84919eb5c..c88963d399c4c92 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
@@ -93,8 +93,7 @@ struct SparseCompilerOptions
       desc("Specify if the temporary buffers created by the sparse "
            "compiler should be deallocated. For compatibility with core "
            "bufferization passes. "
-           "This option is only used when enable-runtime-library=false. "
-           "See also create-deallocs for BufferizationOption."),
+           "This option is only used when enable-runtime-library=false."),
       init(true)};
 
   PassOptions::Option<int32_t> vectorLength{
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 2e549b0335688cb..57cd303d2076e73 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -140,27 +140,11 @@ Operation *bufferization::getOwnerOfValue(Value value) {
   return llvm::cast<BlockArgument>(value).getOwner()->getParentOp();
 }
 
-bool bufferization::allocationDoesNotEscape(OpResult opResult) {
-#ifndef NDEBUG
-  auto bufferizableOp = opResult.getDefiningOp<BufferizableOpInterface>();
-  assert(bufferizableOp && bufferizableOp.bufferizesToAllocation(opResult) &&
-         "expected op that bufferizes to an allocation");
-#endif // NDEBUG
-
-  Operation *op = opResult.getDefiningOp();
-  // If there is no 'escape' attribute, we cannot say for sure.
-  if (!op->hasAttr(BufferizationDialect::kEscapeAttrName))
-    return false;
-  auto attr =
-      op->getAttrOfType<ArrayAttr>(BufferizationDialect::kEscapeAttrName);
-  return !llvm::cast<BoolAttr>(attr[opResult.getResultNumber()]).getValue();
-}
-
 /// Create an AllocTensorOp for the given shaped value. If `copy` is set, the
 /// shaped value is copied. Otherwise, a tensor with undefined contents is
 /// allocated.
 FailureOr<Value> bufferization::allocateTensorForShapedValue(
-    OpBuilder &b, Location loc, Value shapedValue, bool escape,
+    OpBuilder &b, Location loc, Value shapedValue,
     const BufferizationOptions &options, bool copy) {
   Value tensor;
   if (llvm::isa<RankedTensorType>(shapedValue.getType())) {
@@ -202,8 +186,6 @@ FailureOr<Value> bufferization::allocateTensorForShapedValue(
   // Create AllocTensorOp.
   auto allocTensorOp = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
                                                copy ? tensor : Value());
-  allocTensorOp->setAttr(BufferizationDialect::kEscapeAttrName,
-                         b.getBoolArrayAttr({escape}));
 
   // Add 'memory_space' attribute. Not needed if 'copy' operand is specified.
   if (copy)
@@ -224,10 +206,8 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
   Operation *op = getOperation();
   SmallVector<OpOperand *> outOfPlaceOpOperands;
   DenseSet<OpOperand *> copiedOpOperands;
-  DenseSet<OpOperand *> escapingOpOperandCopies;
   SmallVector<Value> outOfPlaceValues;
   DenseSet<Value> copiedOpValues;
-  DenseSet<Value> escapingValueCopies;
 
   // Find all out-of-place OpOperands.
   for (OpOperand &opOperand : op->getOpOperands()) {
@@ -243,11 +223,6 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
     // Is the result yielded from a block? Or are deallocations turned off
     // entirely? In either case, mark the allocation as "escaping", so that it
     // will not be deallocated.
-    bool escape = !state.getOptions().createDeallocs ||
-                  llvm::any_of(aliasingValues, [&](AliasingValue a) {
-                    return state.isTensorYielded(a.value);
-                  });
-
     if (aliasingValues.getNumAliases() == 1 &&
         isa<OpResult>(aliasingValues.getAliases()[0].value) &&
         !state.bufferizesToMemoryWrite(opOperand) &&
@@ -265,15 +240,11 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
       outOfPlaceValues.push_back(value);
       if (!state.canOmitTensorCopy(opOperand))
         copiedOpValues.insert(value);
-      if (escape)
-        escapingValueCopies.insert(value);
     } else {
       // In all other cases, make a copy of the OpOperand.
       outOfPlaceOpOperands.push_back(&opOperand);
       if (!state.canOmitTensorCopy(opOperand))
         copiedOpOperands.insert(&opOperand);
-      if (escape)
-        escapingOpOperandCopies.insert(&opOperand);
     }
   }
 
@@ -281,8 +252,7 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
   rewriter.setInsertionPoint(op);
   for (OpOperand *opOperand : outOfPlaceOpOperands) {
     FailureOr<Value> copy = allocateTensorForShapedValue(
-        rewriter, op->getLoc(), opOperand->get(),
-        escapingOpOperandCopies.contains(opOperand), state.getOptions(),
+        rewriter, op->getLoc(), opOperand->get(), state.getOptions(),
         copiedOpOperands.contains(opOperand));
     if (failed(copy))
       return failure();
@@ -293,8 +263,8 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
   rewriter.setInsertionPointAfter(op);
   for (Value value : outOfPlaceValues) {
     FailureOr<Value> copy = allocateTensorForShapedValue(
-        rewriter, op->getLoc(), value, escapingValueCopies.contains(value),
-        state.getOptions(), copiedOpValues.count(value));
+        rewriter, op->getLoc(), value, state.getOptions(),
+        copiedOpValues.count(value));
     if (failed(copy))
       return failure();
     SmallVector<OpOperand *> uses = llvm::to_vector(
@@ -314,29 +284,6 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
   return success();
 }
 
-bool bufferization::shouldDeallocateOpResult(
-    OpResult opResult, const BufferizationOptions &options) {
-  Operation *op = opResult.getOwner();
-  assert(options.dynCastBufferizableOp(op).bufferizesToAllocation(opResult) &&
-         "expected that op allocates");
-
-  AnalysisState analysisState(options);
-  if (op->hasAttr(BufferizationDialect::kEscapeAttrName)) {
-    // AllocTensorOp has one result.
-    ArrayAttr escapeAttr = llvm::cast<ArrayAttr>(
-        op->getAttr(BufferizationDialect::kEscapeAttrName));
-    return !llvm::cast<BoolAttr>(escapeAttr[0]).getValue();
-  }
-
-  // No "escape" annotation found.
-  if (options.createDeallocs) {
-    // Perform an ad-hoc analysis.
-    return !analysisState.isTensorYielded(opResult);
-  }
-
-  return false;
-}
-
 //===----------------------------------------------------------------------===//
 // OpFilter
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
index 2805e9a43e446f7..802bd52269419b4 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp
@@ -28,9 +28,6 @@ constexpr const ::llvm::StringLiteral BufferizationDialect::kWritableAttrName;
 constexpr const ::llvm::StringLiteral
     BufferizationDialect::kBufferLayoutAttrName;
 
-/// Attribute name used to mark escaping behavior of buffer allocations.
-constexpr const ::llvm::StringLiteral BufferizationDialect::kEscapeAttrName;
-
 //===----------------------------------------------------------------------===//
 // Bufferization Dialect Interfaces
 //===----------------------------------------------------------------------===//
@@ -108,38 +105,6 @@ BufferizationDialect::verifyOperationAttribute(Operation *op,
                                                NamedAttribute attr) {
   using bufferization::BufferizableOpInterface;
 
-  if (attr.getName() == kEscapeAttrName) {
-    auto arrayAttr = llvm::dyn_cast<ArrayAttr>(attr.getValue());
-    if (!arrayAttr)
-      return op->emitError() << "'" << kEscapeAttrName
-                             << "' is expected to be a bool array attribute";
-    if (arrayAttr.size() != op->getNumResults())
-      return op->emitError()
-             << "'" << kEscapeAttrName
-             << "' has wrong number of elements, expected "
-             << op->getNumResults() << ", got " << arrayAttr.size();
-    auto bufferizableOp = dyn_cast<BufferizableOpInterface>(op);
-    if (!bufferizableOp)
-      return op->emitError()
-             << "'" << kEscapeAttrName << "' only valid on bufferizable ops";
-    for (const auto &it : llvm::enumerate(arrayAttr)) {
-      auto attr = it.value();
-      auto boolAttr = llvm::dyn_cast<BoolAttr>(attr);
-      if (!boolAttr)
-        return op->emitError() << "'" << kEscapeAttrName
-                               << "' is expected to be a bool array attribute";
-      if (!boolAttr.getValue())
-        continue;
-      if (!llvm::isa<TensorType>(op->getResult(it.index()).getType()))
-        return op->emitError()
-               << "'" << kEscapeAttrName << "' only valid for tensor results";
-      if (!bufferizableOp.bufferizesToAllocation(op->getOpResult(it.index())))
-        return op->emitError() << "'" << kEscapeAttrName
-                               << "' only valid for allocation results";
-    }
-    return success();
-  }
-
   return op->emitError()
          << "attribute '" << attr.getName()
          << "' not supported as an op attribute by the bufferization dialect";
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index e5016c956804688..f267a5b74294aaa 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -187,20 +187,9 @@ LogicalResult AllocTensorOp::bufferize(RewriterBase &rewriter,
       return failure();
   }
 
-  // Should the buffer be deallocated?
-  bool dealloc =
-      shouldDeallocateOpResult(llvm::cast<OpResult>(getResult()), options);
-
   // Replace op.
   replaceOpWithBufferizedValues(rewriter, getOperation(), *alloc);
 
-  // Create buffer deallocation (if requested).
-  if (!dealloc)
-    return success();
-
-  rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());
-  if (failed(options.createDealloc(rewriter, loc, *alloc)))
-    return failure();
   return success();
 }
 
diff --git a/mlir/lib/Dialect/Bufferization/TransformOps/BufferizationTransformOps.cpp b/mlir/lib/Dialect/Bufferization/TransformOps/BufferizationTransformOps.cpp
index b84cc452d0141cd..aa27615e92c419d 100644
--- a/mlir/lib/Dialect/Bufferization/TransformOps/BufferizationTransformOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/TransformOps/BufferizationTransformOps.cpp
@@ -55,10 +55,9 @@ transform::OneShotBufferizeOp::apply(transform::TransformRewriter &rewriter,
                                      TransformResults &transformResults,
                                      TransformState &state) {
   OneShotBufferizationOptions options;
-  options.allowReturnAllocs = getAllowReturnAllocs();
+  options.allowReturnAllocsFromLoops = getAllowReturnAllocsFromLoops();
   options.allowUnknownOps = getAllowUnknownOps();
   options.bufferizeFunctionBoundaries = getBufferizeFunctionBoundaries();
-  options.createDeallocs = getCreateDeallocs();
   options.testAnalysisOnly = getTestAnalysisOnly();
   options.printConflicts = getPrintConflicts();
   if (getFunctionBoundaryTypeConversion().has_value())
@@ -114,7 +113,7 @@ DiagnosedSilenceableFailure transform::EliminateEmptyTensorsOp::apply(
     transform::TransformRewriter &rewriter, TransformResults &transformResults,
     TransformState &state) {
   OneShotBufferizationOptions options;
-  options.allowReturnAllocs = true;
+  options.allowReturnAllocsFromLoops = true;
 
   for (Operation *target : state.getPayloadOps(getTarget())) {
     OneShotAnalysisState state(target, options);
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 7358d0d465d3e3d..cad78b3e65b2313 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -21,7 +21,6 @@
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "mlir/Pass/PassManager.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/Passes.h"
 #include <optional>
 
@@ -204,12 +203,11 @@ struct OneShotBufferizePass
     if (!options) {
       // Make new bufferization options if none were provided when creating the
       // pass.
-      opt.allowReturnAllocs = allowReturnAllocs;
+      opt.allowReturnAllocsFromLoops = allowReturnAllocsFromLoops;
       opt.allowUnknownOps = allowUnknownOps;
       opt.analysisFuzzerSeed = analysisFuzzerSeed;
       opt.analysisHeuristic = parseHeuristicOption(analysisHeuristic);
       opt.copyBeforeWrite = copyBeforeWrite;
-      opt.createDeallocs = createDeallocs;
       opt.dumpAliasSets = dumpAliasSets;
       opt.setFunctionBoundaryTypeConversion(
           parseLayoutMapOption(functionBoundaryTypeConversion));
@@ -303,7 +301,6 @@ struct OneShotBufferizePass
 
     // Set pass statistics.
     this->numBufferAlloc = statistics.numBufferAlloc;
-    this->numBufferDealloc = statistics.numBufferDealloc;
     this->numTensorInPlace = statistics.numTensorInPlace;
     this->numTensorOutOfPlace = statistics.numTensorOutOfPlace;
   }
@@ -408,14 +405,11 @@ class BufferizationRewriter : public IRRewriter, public RewriterBase::Listener {
   void notifyOperationInserted(Operation *op) override {
     erasedOps.erase(op);
 
-    // Gather statistics about allocs and deallocs.
+    // Gather statistics about allocs.
     if (statistics) {
-      if (auto sideEffectingOp = dyn_cast<MemoryEffectOpInterface>(op)) {
+      if (auto sideEffectingOp = dyn_cast<MemoryEffectOpInterface>(op))
         statistics->numBufferAlloc += static_cast<int64_t>(
             sideEffectingOp.hasEffect<MemoryEffects::Allocate>());
-        statistics->numBufferDealloc += static_cast<int64_t>(
-            sideEffectingOp.hasEffect<MemoryEffects::Free>());
-      }
     }
 
     // Keep track of to_memref ops.
@@ -672,7 +666,6 @@ bufferization::bufferizeBlockSignature(Block *block, RewriterBase &rewriter,
 BufferizationOptions bufferization::getPartialBufferizationOptions() {
   BufferizationOptions options;
   options.allowUnknownOps = true;
-  options.createDeallocs = false;
   options.enforceAliasingInvariants = false;
   options.unknownTypeConverterFn = [](Value value, Attribute memorySpace,
                                       const BufferizationOptions &options) {
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
index 1662b52968d35b3..287949324b31d72 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
@@ -182,7 +182,7 @@ struct EmptyTensorElimination
 void EmptyTensorElimination::runOnOperation() {
   Operation *op = getOperation();
   OneShotBufferizationOptions options;
-  options.allowReturnAllocs = true;
+  options.allowReturnAllocsFromLoops = true;
   OneShotAnalysisState state(op, options);
   if (failed(analyzeOp(op, state))) {
     signalPassFailure();
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp
index 8141e554961995e..3a8c397c02a8096 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp
@@ -352,13 +352,7 @@ struct FuncOpInterface
     // TODO: func.func with multiple returns are not supported.
     if (!getAssumedUniqueReturnOp(funcOp) && !funcOp.isExternal())
       return op->emitOpError("op without unique func.return is not supported");
-    const auto &options =
-        static_cast<const OneShotBufferizationOptions &>(state.getOptions());
-    // allow-return-allocs is required for ops with multiple blocks.
-    if (options.allowReturnAllocs || funcOp.getRegion().getBlocks().size() <= 1)
-      return success();
-    return op->emitOpError(
-        "op cannot be bufferized without allow-return-allocs");
+    return success();
   }
 
   /// Rewrite function bbArgs and return values into buffer form. This function
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
index 56bb6204b030245..09205388a644720 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp
@@ -1307,82 +1307,6 @@ static void annotateOpsWithAliasSets(Operation *op,
   });
 }
 
-/// Assert that every allocation can be deallocated in the same block. I.e.,
-/// every value that is returned or yielded from a block is:
-/// * guaranteed to be aliasing a bbArg of that block or a parent block, or
-/// * guaranteed to be aliasing an OpResult of a op in a parent block.
-///
-/// In that case, buffer deallocation is simple: Every allocated buffer can be
-/// deallocated in the same block. Otherwise, the buffer deallocation pass must
-/// be run.
-///
-/// Note: The current implementation checks for equivalent values instead of
-/// aliasing values, which is stricter than needed. We can currently not check
-/// for aliasing values because the analysis is a maybe-alias analysis and we
-/// need a must-alias analysis here.
-///
-/// Example:
-/// ```
-/// %0 = "some_op" : tensor<?xf32>
-/// %1 = scf.if %c -> (tensor<?xf32>) {
-///   scf.yield %0 : tensor<?xf32>
-/// } else {
-///   %t = linalg.alloc_tensor : tensor<?xf32>
-///   scf.yield %t : tensor<?xf32>
-/// }
-/// ```
-///
-/// In the above example, the second scf.yield op is problematic because the
-/// yielded value %t is defined in the same block as the scf.yield op and
-/// and bufferizes to a new allocation.
-// TODO: Remove buffer deallocation from One-Shot Bufferize and fix the buffer
-// deallocation pass.
-static LogicalResult assertNoAllocsReturned(Operation *op,
-                                            const OneShotAnalysisState &state) {
-  LogicalResult status = success();
-  DominanceInfo domInfo(op);
-  op->walk([&](Operation *returnOp) {
-    if (!isa<RegionBranchTerminatorOpInterface>(returnOp) ||
-        !state.getOptions().isOpAllowed(returnOp))
-      return WalkResult::advance();
-
-    for (OpOperand &returnValOperand : returnOp->getOpOperands()) {
-      Value returnVal = returnValOperand.get();
-      // Skip non-tensor values.
-      if (!isa<TensorType>(returnVal.getType()))
-        continue;
-
-      bool foundEquivValue = false;
-      state.applyOnEquivalenceClass(returnVal, [&](Value equivVal) {
-        if (auto bbArg = dyn_cast<BlockArgument>(equivVal)) {
-          Operation *definingOp = bbArg.getOwner()->getParentOp();
-          if (definingOp->isProperAncestor(returnOp))
-            foundEquivValue = true;
-          return;
-        }
-
-        Operation *definingOp = equivVal.getDefiningOp();
-        if (definingOp->getBlock()->findAncestorOpInBlock(
-                *returnOp->getParentOp()))
-          // Skip ops that happen after `returnOp` and parent ops.
-          if (happensBefore(definingOp, returnOp, domInfo))
-            foundEquivValue = true;
-      });
-
-      // Note: Returning/yielding buffer allocations is allowed only if
-      // `allowReturnAllocs` is set.
-      if (!foundEquivValue)
-        status = returnOp->emitError()
-                 << "operand #" << returnValOperand.getOperandNumber()
-                 << " may return/yield a new buffer allocation";
-    }
-
-    return WalkResult::advance();
-  });
-
-  return status;
-}
-
 LogicalResult bufferization::analyzeOp(Operation *op,
                                        OneShotAnalysisState &state,
                                        BufferizationStatistics *statistics) {
@@ -1402,8 +1326,6 @@ LogicalResult bufferization::analyzeOp(Operation *op,
   }
 
   bool failedAnalysis = false;
-  if (!options.allowReturnAllocs)
-    failedAnalysis |= failed(assertNoAllocsReturned(op, state));
 
   // Gather some extra analysis data.
   state.gatherYieldedTensors(op);
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
index b12ea25396b2253..ad7da317f5db14d 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -124,33 +124,12 @@ LogicalResult
 mlir::bufferization::insertTensorCopies(Operation *op,
                                         const AnalysisState &state) {
   IRRewriter rewriter(op->getContext());
-  StringRef escapeAttrName = BufferizationDialect::kEscapeAttrName;
 
   WalkResult result = op->walk([&](Operation *op) {
     auto bufferizableOp = state.getOptions().dynCastBufferizableOp(op);
     if (!bufferizableOp)
       return WalkResult::skip();
 
-    // Find allocations without an `escape` attribute and add the attribute
-    // based on analysis results.
-    if (!op->hasAttr(escapeAttrName)) {
-      SmallVector<bool> escapeAttrValue;
-      bool foundTensorResult = false;
-      for (OpResult opResult : op->getOpResults()) {
-        if (!isa<TensorType>(opResult.getType()) ||
-            !bufferizableOp.bufferizesToAllocation(opResult)) {
-          escapeAttrValue.push_back(false);
-          continue;
-        }
-        foundTensorResult = true;
-        bool escape = !state.getOptions().createDeallocs ||
-                      state.isTensorYielded(opResult);
-        escapeAttrValue.push_back(escape);
-      }
-      if (foundTensorResult)
-        op->setAttr(escapeAttrName, rewriter.getBoolArrayAttr(escapeAttrValue));
-    }
-
     // Find inplacability conflicts and resolve them. (Typically with explicit
     // tensor copies in the form of AllocTensorOps.)
     rewriter.setInsertionPoint(op);
diff --git a/mlir/lib/Dialect/ControlFlow/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/ControlFlow/Transforms/BufferizableOpInterfaceImpl.cpp
index 3228872029a2745..72f4a1a4f4c668f 100644
--- a/mlir/lib/Dialect/ControlFlow/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/ControlFlow/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -39,12 +39,7 @@ struct BranchLikeOpInterface
 
   LogicalResult verifyAnalysis(Operation *op,
                                const AnalysisState &state) const {
-    const auto &options =
-        static_cast<const OneShotBufferizationOptions &>(state.getOptions());
-    if (options.allowReturnAllocs)
-      return success();
-    return op->emitOpError(
-        "op cannot be bufferized without allow-return-allocs");
+    return success();
   }
 
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index dc65ac509d280dc..de4270ab38004a1 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -389,7 +389,7 @@ transform::EliminateLinalgOpAnchoredEmptyTensorsOp::apply(
     transform::TransformRewriter &rewriter, TransformResults &transformResults,
     TransformState &state) {
   bufferization::OneShotBufferizationOptions options;
-  options.allowReturnAllocs = true;
+  options.allowReturnAllocsFromLoops = true;
 
   for (Operation *target : state.getPayloadOps(getTarget())) {
     bufferization::OneShotAnalysisState state(target, options);
diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
index 11cfefed890c669..665171dca4e2868 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -134,14 +134,7 @@ struct ExecuteRegionOpInterface
     // TODO: scf.execute_region with multiple yields are not supported.
     if (!getUniqueYieldOp(executeRegionOp))
       return op->emitOpError("op without unique scf.yield is not supported");
-    const auto &options =
-        static_cast<const OneShotBufferizationOptions &>(state.getOptions());
-    // allow-return-allocs is required for ops with multiple blocks.
-    if (options.allowReturnAllocs ||
-        executeRegionOp.getRegion().getBlocks().size() == 1)
-      return success();
-    return op->emitOpError(
-        "op cannot be bufferized without allow-return-allocs");
+    return success();
   }
 
   AliasingOpOperandList
@@ -552,9 +545,8 @@ struct ForOpInterface
         yieldValues.push_back(value);
         continue;
       }
-      FailureOr<Value> alloc =
-          allocateTensorForShapedValue(rewriter, yieldOp.getLoc(), value,
-                                       /*escape=*/true, state.getOptions());
+      FailureOr<Value> alloc = allocateTensorForShapedValue(
+          rewriter, yieldOp.getLoc(), value, state.getOptions());
       if (failed(alloc))
         return failure();
       yieldValues.push_back(*alloc);
@@ -661,7 +653,7 @@ struct ForOpInterface
                                const AnalysisState &state) const {
     const auto &options =
         static_cast<const OneShotBufferizationOptions &>(state.getOptions());
-    if (options.allowReturnAllocs)
+    if (options.allowReturnAllocsFromLoops)
       return success();
 
     auto forOp = cast<scf::ForOp>(op);
@@ -799,9 +791,8 @@ struct WhileOpInterface
         beforeYieldValues.push_back(value);
         continue;
       }
-      FailureOr<Value> alloc =
-          allocateTensorForShapedValue(rewriter, conditionOp.getLoc(), value,
-                                       /*escape=*/true, state.getOptions());
+      FailureOr<Value> alloc = allocateTensorForShapedValue(
+          rewriter, conditionOp.getLoc(), value, state.getOptions());
       if (failed(alloc))
         return failure();
       beforeYieldValues.push_back(*alloc);
@@ -947,7 +938,7 @@ struct WhileOpInterface
     auto whileOp = cast<scf::WhileOp>(op);
     const auto &options =
         static_cast<const OneShotBufferizationOptions &>(state.getOptions());
-    if (options.allowReturnAllocs)
+    if (options.allowReturnAllocsFromLoops)
       return success();
 
     auto conditionOp = whileOp.getConditionOp();
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index e7ba2debe0353ed..871686a4ada0f70 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -962,7 +962,6 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
       const Type iTp = rewriter.getIndexType();
       Value dimCoords = genAlloca(rewriter, loc, dimRank, iTp);
       Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
-      Block *insertionBlock = rewriter.getInsertionBlock();
       // TODO: Dense buffers should be allocated/deallocated via the callback
       // in BufferizationOptions.
       Value dst = allocDenseTensor(rewriter, loc, dstTp, dimSizes);
@@ -982,11 +981,6 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
       genDelIteratorCall(rewriter, loc, elemTp, iter);
       rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(
           op, dstTp.getRankedTensorType(), dst);
-      // Deallocate the buffer.
-      if (bufferization::allocationDoesNotEscape(op->getOpResult(0))) {
-        rewriter.setInsertionPoint(insertionBlock->getTerminator());
-        deallocDenseTensor(rewriter, loc, dst);
-      }
       return success();
     }
     assert(!srcTp.hasEncoding() && dstTp.hasEncoding());
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 38e6621d54b331d..5d7cbbf94cdfed5 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -1120,8 +1120,6 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
     sizesForTensor(rewriter, sizes, loc, srcTp, src);
 
     Value dst = allocDenseTensor(rewriter, loc, dstTp, sizes);
-    Block *insertionBlock = rewriter.getInsertionBlock();
-    bool noEscape = bufferization::allocationDoesNotEscape(op->getOpResult(0));
 
     rewriter.create<ForeachOp>(loc, src, std::nullopt,
                                [&](OpBuilder &builder, Location loc,
@@ -1132,12 +1130,6 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
                                });
 
     rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, dstTp, dst);
-
-    // Deallocate the buffer.
-    if (noEscape) {
-      rewriter.setInsertionPoint(insertionBlock->getTerminator());
-      deallocDenseTensor(rewriter, loc, dst);
-    }
     return success();
   }
 
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
index 9af525d55bf34e9..9b5567814a75f32 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparsificationAndBufferizationPass.cpp
@@ -185,9 +185,6 @@ mlir::getBufferizationOptionsForSparsification(bool analysisOnly) {
   using namespace mlir::bufferization;
   OneShotBufferizationOptions options;
   options.bufferizeFunctionBoundaries = true;
-  // TODO(springerm): To spot memory leaks more easily, returning dense allocs
-  // should be disallowed.
-  options.allowReturnAllocs = true;
   options.setFunctionBoundaryTypeConversion(LayoutMapOption::IdentityLayoutMap);
   options.unknownTypeConverterFn = [](Value value, Attribute memorySpace,
                                       const BufferizationOptions &options) {
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index ecca4dd3394e0ae..b715cf054f1da5e 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -203,8 +203,7 @@ struct CollapseShapeOpInterface
       // TODO: Create alloc_tensor ops during TensorCopyInsertion.
       AnalysisState analysisState(options);
       FailureOr<Value> tensorAlloc = allocateTensorForShapedValue(
-          rewriter, op->getLoc(), collapseShapeOp.getSrc(),
-          analysisState.isTensorYielded(collapseShapeOp.getResult()), options);
+          rewriter, op->getLoc(), collapseShapeOp.getSrc(), options);
       if (failed(tensorAlloc))
         return failure();
       auto memrefType =
@@ -465,9 +464,6 @@ struct FromElementsOpInterface
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto fromElementsOp = cast<tensor::FromElementsOp>(op);
-    // Should the buffer be deallocated?
-    bool dealloc = shouldDeallocateOpResult(
-        cast<OpResult>(fromElementsOp.getResult()), options);
 
     // TODO: Implement memory space for this op.
     if (options.defaultMemorySpace != Attribute())
@@ -478,10 +474,9 @@ struct FromElementsOpInterface
     auto tensorType = cast<RankedTensorType>(fromElementsOp.getType());
     auto shape = tensorType.getShape();
     // TODO: Create alloc_tensor ops during TensorCopyInsertion.
-    FailureOr<Value> tensorAlloc =
-        allocateTensorForShapedValue(rewriter, loc, fromElementsOp.getResult(),
-                                     /*escape=*/!dealloc, options,
-                                     /*copy=*/false);
+    FailureOr<Value> tensorAlloc = allocateTensorForShapedValue(
+        rewriter, loc, fromElementsOp.getResult(), options,
+        /*copy=*/false);
     if (failed(tensorAlloc))
       return failure();
     auto memrefType =
@@ -583,9 +578,6 @@ struct GenerateOpInterface
   LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
                           const BufferizationOptions &options) const {
     auto generateOp = cast<tensor::GenerateOp>(op);
-    // Should the buffer be deallocated?
-    bool dealloc = shouldDeallocateOpResult(
-        cast<OpResult>(generateOp.getResult()), options);
 
     // TODO: Implement memory space for this op.
     if (options.defaultMemorySpace != Attribute())
@@ -593,10 +585,9 @@ struct GenerateOpInterface
 
     // Allocate memory.
     Location loc = op->getLoc();
-    FailureOr<Value> tensorAlloc =
-        allocateTensorForShapedValue(rewriter, loc, generateOp.getResult(),
-                                     /*escape=*/!dealloc, options,
-                                     /*copy=*/false);
+    FailureOr<Value> tensorAlloc = allocateTensorForShapedValue(
+        rewriter, loc, generateOp.getResult(), options,
+        /*copy=*/false);
     if (failed(tensorAlloc))
       return failure();
 
@@ -783,13 +774,9 @@ struct PadOpInterface
       dynamicSizes.push_back(sum);
     }
 
-    // Should the buffer be deallocated?
-    bool dealloc =
-        shouldDeallocateOpResult(cast<OpResult>(padOp.getResult()), options);
     // Allocate a buffer for the padded result.
     FailureOr<Value> tensorAlloc =
-        allocateTensorForShapedValue(rewriter, loc, padOp.getResult(),
-                                     /*escape=*/!dealloc, options,
+        allocateTensorForShapedValue(rewriter, loc, padOp.getResult(), options,
                                      /*copy=*/false);
     if (failed(tensorAlloc))
       return failure();
@@ -991,20 +978,15 @@ struct SplatOpInterface
     OpBuilder::InsertionGuard g(rewriter);
     auto splatOp = cast<tensor::SplatOp>(op);
 
-    // Should the buffer be deallocated?
-    bool dealloc =
-        shouldDeallocateOpResult(cast<OpResult>(splatOp.getResult()), options);
-
     // TODO: Implement memory space for this op.
     if (options.defaultMemorySpace != Attribute())
       return op->emitError("memory space not implemented yet");
 
     // Allocate memory.
     Location loc = op->getLoc();
-    FailureOr<Value> tensorAlloc =
-        allocateTensorForShapedValue(rewriter, loc, splatOp.getResult(),
-                                     /*escape=*/!dealloc, options,
-                                     /*copy=*/false);
+    FailureOr<Value> tensorAlloc = allocateTensorForShapedValue(
+        rewriter, loc, splatOp.getResult(), options,
+        /*copy=*/false);
     if (failed(tensorAlloc))
       return failure();
 
diff --git a/mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py
index ead337282bb7fd6..7e6c1b81cb350b7 100644
--- a/mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py
+++ b/mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py
@@ -62,10 +62,9 @@ def __init__(
         transformed_type: Type,
         target: Union[Operation, OpView, Value],
         *,
-        allow_return_allocs: Optional[bool] = None,
+        allow_return_allocs_from_loops: Optional[bool] = None,
         allow_unknown_ops: Optional[bool] = None,
         bufferize_function_boundaries: Optional[bool] = None,
-        create_deallocs: Optional[bool] = None,
         function_boundary_type_conversion: Optional[Enum] = None,
         memcpy_op: Optional[str] = None,
         print_conflicts: Optional[bool] = None,
@@ -80,10 +79,9 @@ def __init__(
         self,
         target: Union[Operation, OpView, Value],
         *,
-        allow_return_allocs: Optional[bool] = None,
+        allow_return_allocs_from_loops: Optional[bool] = None,
         allow_unknown_ops: Optional[bool] = None,
         bufferize_function_boundaries: Optional[bool] = None,
-        create_deallocs: Optional[bool] = None,
         function_boundary_type_conversion: Optional[Enum] = None,
         memcpy_op: Optional[str] = None,
         print_conflicts: Optional[bool] = None,
@@ -98,10 +96,9 @@ def __init__(
         transformed_type_or_target: Type,
         target_or_none: Optional[Union[Operation, OpView, Value]] = None,
         *,
-        allow_return_allocs: Optional[bool] = None,
+        allow_return_allocs_from_loops: Optional[bool] = None,
         allow_unknown_ops: Optional[bool] = None,
         bufferize_function_boundaries: Optional[bool] = None,
-        create_deallocs: Optional[bool] = None,
         function_boundary_type_conversion: Optional[Enum] = None,
         memcpy_op: Optional[str] = None,
         print_conflicts: Optional[bool] = None,
@@ -119,10 +116,9 @@ def __init__(
         super().__init__(
             transformed_type,
             target,
-            allow_return_allocs=allow_return_allocs,
+            allow_return_allocs_from_loops=allow_return_allocs_from_loops,
             allow_unknown_ops=allow_unknown_ops,
             bufferize_function_boundaries=bufferize_function_boundaries,
-            create_deallocs=create_deallocs,
             function_boundary_type_conversion=function_boundary_type_conversion,
             memcpy_op=memcpy_op,
             print_conflicts=print_conflicts,
diff --git a/mlir/test/Dialect/Arith/one-shot-bufferize.mlir b/mlir/test/Dialect/Arith/one-shot-bufferize.mlir
index bfca345e0642b1b..174bf2fc8e4bc75 100644
--- a/mlir/test/Dialect/Arith/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Arith/one-shot-bufferize.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries" -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // CHECK-LABEL: func @write_to_select_op_source
 //  CHECK-SAME:     %[[t1:.*]]: memref<?xf32, strided{{.*}}>, %[[t2:.*]]: memref<?xf32, strided{{.*}}>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
index bccff8ef8d65aaa..e4375950d336377 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-allow-return-allocs.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
 // CHECK-LABEL: func @buffer_not_deallocated(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
index d74455fdf914ddf..47ede793e9eaba1 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @buffer_forwarding_conflict
 func.func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {bufferization.writable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
index 9ebd60f50328e14..b693c563859cc6d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-compat.mlir
@@ -1,23 +1,13 @@
 // RUN: mlir-opt %s \
-// RUN:     -one-shot-bufferize="allow-unknown-ops create-deallocs=0" \
+// RUN:     -one-shot-bufferize="allow-unknown-ops" \
 // RUN:     -split-input-file | \
-// RUN: FileCheck %s --check-prefix=CHECK-NODEALLOC
+// RUN: FileCheck %s
 
-// RUN: mlir-opt %s \
-// RUN:     -one-shot-bufferize="allow-unknown-ops create-deallocs=0" \
-// RUN:     -buffer-deallocation-pipeline | \
-// RUN: FileCheck %s --check-prefix=CHECK-BUFFERDEALLOC
-
-// CHECK-NODEALLOC-LABEL: func @out_of_place_bufferization
-// CHECK-BUFFERDEALLOC-LABEL: func @out_of_place_bufferization
+// CHECK-LABEL: func @out_of_place_bufferization
 func.func @out_of_place_bufferization(%t1 : tensor<?xf32>) -> (f32, f32) {
-  //     CHECK-NODEALLOC: memref.alloc
-  //     CHECK-NODEALLOC: memref.copy
-  // CHECK-NODEALLOC-NOT: memref.dealloc
-
-  //     CHECK-BUFFERDEALLOC: %[[alloc:.*]] = memref.alloc
-  //     CHECK-BUFFERDEALLOC: memref.copy
-  //     CHECK-BUFFERDEALLOC: memref.dealloc %[[alloc]]
+  //     CHECK: memref.alloc
+  //     CHECK: memref.copy
+  // CHECK-NOT: memref.dealloc
 
   %cst = arith.constant 0.0 : f32
   %idx = arith.constant 5 : index
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
index 3d15599915f0cfc..ee64d2c9b8de4f7 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs" -cse -canonicalize -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -eliminate-empty-tensors -empty-tensor-to-alloc-tensor -one-shot-bufferize="bufferize-function-boundaries" -cse -canonicalize -split-input-file | FileCheck %s
 
 //      CHECK: func @buffer_forwarding_conflict(
 // CHECK-SAME:   %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
index 6039afec7757e11..2c5f2083f589083 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
@@ -1,15 +1,15 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs allow-unknown-ops" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops" -split-input-file | FileCheck %s
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs allow-unknown-ops unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=tensor,bufferization allow-unknown-ops allow-return-allocs" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=scf,bufferization allow-unknown-ops allow-return-allocs" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=tensor,bufferization allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="dialect-filter=scf,bufferization allow-unknown-ops" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF
 
 // CHECK-LABEL: func @use_of_unknown_op_1(
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?xf32>
@@ -155,8 +155,6 @@ func.func @unknown_op_may_read(%v: vector<5xf32>)
   // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[filled_tensor]])
   %2 = "test.dummy_op"(%filled) : (tensor<10xf32>) -> (tensor<10xf32>)
 
-  // CHECK-DAG: memref.dealloc %[[alloc]]
-  // CHECK-DAG: memref.dealloc %[[m1]]
   // CHECK: return %[[alloc_tensor]], %[[dummy]]
   return %1, %2 : tensor<10xf32>, tensor<10xf32>
 }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-pass-statistics.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-pass-statistics.mlir
index 0f8d886dadde029..1d0c73156a19f7d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-pass-statistics.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-pass-statistics.mlir
@@ -3,7 +3,6 @@
 
 // CHECK: OneShotBufferize
 // CHECK:  (S) 1 num-buffer-alloc
-// CHECK:  (S) 1 num-buffer-dealloc
 // CHECK:  (S) 1 num-tensor-in-place
 // CHECK:  (S) 2 num-tensor-out-of-place
 func.func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index)
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
index f92c7b4ee585150..3f468750cc28405 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -62,7 +62,6 @@ func.func @return_tensor(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf3
   // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]]
   %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor<?xf32>
 
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[res_tensor]]
   return %0 : tensor<?xf32>
 }
@@ -115,7 +114,6 @@ func.func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index)
   // CHECK: %[[read2:.*]] = memref.load %[[alloc]]
   %read2 = tensor.extract %write[%idx] : tensor<10xf32>
 
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[read]], %[[read2]]
   return %read, %read2 : f32, f32
 }
@@ -127,7 +125,6 @@ func.func @copy_deallocated() -> tensor<10xf32> {
   // CHECK: %[[alloc:.*]] = memref.alloc()
   %0 = bufferization.alloc_tensor() : tensor<10xf32>
   // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]]
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[alloc_tensor]]
   return %0 : tensor<10xf32>
 }
@@ -162,7 +159,6 @@ func.func @alloc_tensor_with_copy(%t: tensor<5xf32>) -> tensor<5xf32> {
   // CHECK: memref.copy %[[m]], %[[alloc]]
   %0 = bufferization.alloc_tensor() copy(%t) : tensor<5xf32>
   // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]]
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[r]]
   return %0 : tensor<5xf32>
 }
@@ -174,7 +170,6 @@ func.func @alloc_tensor_with_memory_space() -> tensor<5xf32> {
   // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1>
   %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<5xf32>
   // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]]
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[r]]
   return %0 : tensor<5xf32>
 }
@@ -221,7 +216,6 @@ func.func @tensor_copy(%arg0: tensor<5xf32>) -> tensor<5xf32> {
   // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
   // CHECK: memref.copy %[[m]], %[[alloc]]
   // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]]
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[r]]
   %dest = bufferization.alloc_tensor() : tensor<5xf32>
   %0 = bufferization.materialize_in_destination %arg0 in %dest : tensor<5xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
index 9cacaf0c1fea69f..9319ac61d928eb8 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -1,13 +1,13 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -drop-equivalent-buffer-results -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=NO-DROP
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 " -drop-equivalent-buffer-results -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 " -split-input-file | FileCheck %s --check-prefix=NO-DROP
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file -o /dev/null
 
 // Make sure that the returned buffer is not deallocated.
 // TODO: Such buffers currently leak. We need buffer hoisting / ref counting for
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
index 5284f57a5beb532..a103e65affacd85 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
 // TODO: Extract op-specific test cases and move them to their respective
 // dialects.
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
index 45edca756e0dc7e..a25b57991baca7f 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
@@ -25,40 +25,6 @@ func.func @swappy(%cond1 : i1, %cond2 : i1, %t1 : tensor<f32>, %t2 : tensor<f32>
 
 // -----
 
-func.func @scf_if_not_equivalent(
-    %cond: i1, %t1: tensor<?xf32> {bufferization.writable = true},
-    %idx: index) -> tensor<?xf32> {
-  %r = scf.if %cond -> (tensor<?xf32>) {
-    scf.yield %t1 : tensor<?xf32>
-  } else {
-    // This buffer aliases, but it is not equivalent.
-    %t2 = tensor.extract_slice %t1 [%idx] [%idx] [1] : tensor<?xf32> to tensor<?xf32>
-    // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-    scf.yield %t2 : tensor<?xf32>
-  }
-  // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-  return %r : tensor<?xf32>
-}
-
-// -----
-
-func.func @scf_if_not_aliasing(
-    %cond: i1, %t1: tensor<?xf32> {bufferization.writable = true},
-    %idx: index) -> f32 {
-  %r = scf.if %cond -> (tensor<?xf32>) {
-    scf.yield %t1 : tensor<?xf32>
-  } else {
-    // This buffer aliases.
-    %t2 = bufferization.alloc_tensor(%idx) : tensor<?xf32>
-    // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-    scf.yield %t2 : tensor<?xf32>
-  }
-  %f = tensor.extract %r[%idx] : tensor<?xf32>
-  return %f : f32
-}
-
-// -----
-
 // expected-error @-3 {{expected callgraph to be free of circular dependencies}}
 
 func.func @foo() {
@@ -147,90 +113,6 @@ func.func @scf_while_non_equiv_yield(%arg0: tensor<5xi1>,
 
 // -----
 
-func.func private @fun_with_side_effects(%A: tensor<?xf32>)
-
-func.func @foo(%A: tensor<?xf32> {bufferization.writable = true}) -> (tensor<?xf32>) {
-  call @fun_with_side_effects(%A) : (tensor<?xf32>) -> ()
-  return %A: tensor<?xf32>
-}
-
-func.func @scf_yield_needs_copy(%A : tensor<?xf32> {bufferization.writable = true}, %iters : index) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %res = scf.for %arg0 = %c0 to %iters step %c1 iter_args(%bbarg = %A) -> (tensor<?xf32>) {
-    %r = func.call @foo(%A) : (tensor<?xf32>) -> (tensor<?xf32>)
-    // expected-error @+1 {{Yield operand #0 is not equivalent to the corresponding iter bbArg}}
-    scf.yield %r : tensor<?xf32>
-  }
-  call @fun_with_side_effects(%res) : (tensor<?xf32>) -> ()
-  return
-}
-
-// -----
-
-func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = true})
-  ->  tensor<4xf32>
-{
-  // This bufferizes to a pattern that the cross-function boundary pass needs to
-  // convert into a new memref argument at all call site; this may be either:
-  //   - an externally created aliasing subview (if we want to allow aliasing
-  //     function arguments).
-  //   - a new alloc + copy (more expensive but does not create new function
-  //     argument aliasing).
-  %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32>
-
-  // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-  return %r0: tensor<4xf32>
-}
-
-// -----
-
-func.func @scf_yield(%b : i1, %A : tensor<4xf32>, %B : tensor<4xf32>) -> tensor<4xf32>
-{
-  %r = scf.if %b -> (tensor<4xf32>) {
-    scf.yield %A : tensor<4xf32>
-  } else {
-    scf.yield %B : tensor<4xf32>
-  }
-  // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-  return %r: tensor<4xf32>
-}
-
-// -----
-
-func.func @unknown_op(%A : tensor<4xf32>) -> tensor<4xf32>
-{
-  // expected-error: @+1 {{op was not bufferized}}
-  %r = "marklar"(%A) : (tensor<4xf32>) -> (tensor<4xf32>)
-  // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-  return %r: tensor<4xf32>
-}
-
-// -----
-
-func.func @mini_test_case1() -> tensor<10x20xf32> {
-  %f0 = arith.constant 0.0 : f32
-  %t = bufferization.alloc_tensor() : tensor<10x20xf32>
-  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32>
-  // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-  return %r : tensor<10x20xf32>
-}
-
-// -----
-
-func.func @main() -> tensor<4xi32> {
-  %r = scf.execute_region -> tensor<4xi32> {
-    %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
-    // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-    scf.yield %A: tensor<4xi32>
-  }
-
-  // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-  return %r: tensor<4xi32>
-}
-
-// -----
-
 func.func @to_tensor_op_unsupported(%m: memref<?xf32>, %idx: index) -> (f32) {
   // expected-error @+1 {{to_tensor ops without `restrict` are not supported by One-Shot Analysis}}
   %0 = bufferization.to_tensor %m : memref<?xf32>
@@ -252,35 +134,6 @@ func.func @call_to_unknown_tensor_returning_func(%t : tensor<?xf32>) {
 
 // -----
 
-func.func @foo(%t : tensor<5xf32>) -> (tensor<5xf32>) {
-  %0 = bufferization.alloc_tensor() : tensor<5xf32>
-  // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-  return %0 : tensor<5xf32>
-}
-
-// Note: This function is not analyzed because there was an error in the
-// previous one.
-func.func @call_to_func_returning_non_equiv_tensor(%t : tensor<5xf32>) {
-  call @foo(%t) : (tensor<5xf32>) -> (tensor<5xf32>)
-  return
-}
-
-// -----
-
-func.func @yield_alloc_dominance_test_1(%cst : f32, %idx : index,
-                                        %idx2 : index) -> f32 {
-  %0 = scf.execute_region -> tensor<?xf32> {
-    %1 = bufferization.alloc_tensor(%idx) : tensor<?xf32>
-    // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}}
-    scf.yield %1 : tensor<?xf32>
-  }
-  %2 = tensor.insert %cst into %0[%idx] : tensor<?xf32>
-  %r = tensor.extract %2[%idx2] : tensor<?xf32>
-  return %r : f32
-}
-
-// -----
-
 func.func @yield_alloc_dominance_test_2(%cst : f32, %idx : index,
                                         %idx2 : index) -> f32 {
   %1 = bufferization.alloc_tensor(%idx) : tensor<?xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
index 4e4340c9db8acea..de75b288855f94e 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-out-params.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=fully-dynamic-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=identity-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs function-boundary-type-conversion=infer-layout-map" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=fully-dynamic-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" -drop-equivalent-buffer-results -buffer-results-to-out-params -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries function-boundary-type-conversion=infer-layout-map" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-BASELINE
 
 // Note: function-boundary-type-conversion=infer-layout-map with
 // promote-buffer-results-to-out-params is an unsupported combination.
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
index 249ee3448b8904e..b9de4ba34e0e6d3 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -1,16 +1,16 @@
 // Note: Default is function-boundary-type-conversion=infer-layout-map
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs" -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1" -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
 
 // Test bufferization using memref types that have fully dynamic layout maps.
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 allow-return-allocs function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-FULLY-DYNAMIC-LAYOUT-MAP
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries=1 function-boundary-type-conversion=fully-dynamic-layout-map" -split-input-file | FileCheck %s --check-prefix=CHECK-FULLY-DYNAMIC-LAYOUT-MAP
 
 
 // Bufferization of bodiless function with no tensor return value.
@@ -101,7 +101,6 @@ func.func private @private_func(tensor<?xf32>) -> (f32)
 //   CHECK-DAG: memref.copy %[[t]], %[[alloc]]
 //   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
 //       CHECK: call @private_func(%[[casted]])
-//       CHECK: memref.dealloc %[[alloc]]
 func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
   %0 = call @private_func(%t) : (tensor<?xf32>) -> (f32)
   return %0 : f32
@@ -229,7 +228,6 @@ func.func @f2(%t: tensor<?xf32>) -> (f32) {
 //   CHECK-DAG: memref.copy %[[t3]], %[[alloc]]
 //   CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
 //       CHECK: call @f2(%[[casted]])
-//       CHECK: memref.dealloc %[[alloc]]
 func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
   %0 = call @f2(%t) : (tensor<?xf32>) -> (f32)
   return %0 : f32
@@ -256,7 +254,6 @@ func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
 //   CHECK-NOT:   copy
 //       CHECK:   call @does_not_read(%[[casted]])
 //       CHECK:   %[[r:.*]] = memref.load %[[casted]]
-//       CHECK:   memref.dealloc %[[alloc]]
 func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> f32 {
   %0 = call @does_not_read(%t) : (tensor<?xf32>) -> (tensor<?xf32>)
   %idx = arith.constant 4 : index
@@ -283,7 +280,6 @@ func.func @main() {
 //      CHECK:   call @some_external_func(%[[B]]) : (memref<4xi32, strided<[?], offset: ?>>) -> ()
   call @some_external_func(%A) : (tensor<4xi32>) -> ()
 
-//      CHECK: memref.dealloc %[[alloc]]
   return
 }
 
@@ -312,7 +308,6 @@ func.func @main() {
     scf.yield
   }
 
-//      CHECK:   memref.dealloc %[[alloc]]
   return
 }
 
@@ -463,9 +458,6 @@ func.func @main() {
   // CHECK-NEXT:   call @printMemrefF32(%[[dC]]) : (memref<*xf32>) -> ()
   call @printMemrefF32(%res2) : (tensor<*xf32>) -> ()
 
-  // CHECK-DAG:   memref.dealloc %[[A]] : memref<64xf32>
-  // CHECK-DAG:   memref.dealloc %[[B]] : memref<64xf32>
-  // CHECK-DAG:   memref.dealloc %[[C]] : memref<f32>
   // CHECK-NEXT:   return
   return
 }
@@ -580,7 +572,6 @@ func.func @equivalent_func_arg_2(%t0: tensor<?xf32> {bufferization.writable = tr
     // CHECK-DAG: %[[casted:.*]] = memref.cast %[[alloc]]
     // CHECK-DAG: memref.copy %[[arg0]], %[[alloc]]
     // CHECK: call @inner_func_2(%[[casted]])
-    // CHECK: memref.dealloc %[[alloc]]
     // CHECK-NOT: scf.yield
     %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32>
     scf.yield %t1 : tensor<?xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
index 7679350d47fa4f6..78bd7adec3671d7 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion-memory-space.mlir
@@ -6,7 +6,7 @@ func.func @unknown_op_copy() -> (tensor<10xf32>, tensor<10xf32>) {
   %cst = arith.constant 0.0 : f32
   // CHECK: %[[dummy:.*]] = "test.dummy_op"() : () -> tensor<10xf32>
   %t = "test.dummy_op"() : () -> tensor<10xf32>
-  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[dummy]]) {bufferization.escape = [false]} : tensor<10xf32>
+  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[dummy]]) : tensor<10xf32>
   %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
   return %s, %t : tensor<10xf32>, tensor<10xf32>
 }
@@ -17,9 +17,9 @@ func.func @unknown_op_copy() -> (tensor<10xf32>, tensor<10xf32>) {
 func.func @alloc_tensor_copy() -> (tensor<10xf32>, tensor<10xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  // CHECK: bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
   %t = bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 1 : ui64} : tensor<10xf32>
+  // CHECK: bufferization.alloc_tensor() {memory_space = 1 : ui64} : tensor<10xf32>
   %s = tensor.insert %cst into %t[%c0] : tensor<10xf32>
   return %s, %t : tensor<10xf32>, tensor<10xf32>
 }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
index 4dea1cbcfaa4f44..72cf08df5978cf5 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -1,17 +1,14 @@
 // RUN: mlir-opt %s -test-tensor-copy-insertion -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -test-tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
-// RUN: mlir-opt %s -test-tensor-copy-insertion="create-deallocs=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-DEALLOC
+// RUN: mlir-opt %s -test-tensor-copy-insertion="bufferize-function-boundaries" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
 
 // CHECK-LABEL: func @read_after_write_conflict(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
 // CHECK-FUNC-LABEL: func @read_after_write_conflict(
-// CHECK-NO-DEALLOC-LABEL: func @read_after_write_conflict(
 func.func @read_after_write_conflict(%t: tensor<?xf32>, %idx: index, %f: f32)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
-  // CHECK: %[[copy:.*]] = bufferization.alloc_tensor() copy(%[[t]]) {bufferization.escape = [false]} : tensor<?xf32>
-  // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {bufferization.escape = [true]} : tensor<?xf32>
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() copy(%{{.*}}) {bufferization.escape = [true]} : tensor<?xf32>
+  // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) : tensor<?xf32>
+  // CHECK: %[[copy:.*]] =  bufferization.alloc_tensor() copy(%{{.*}}) : tensor<?xf32>
   // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[copy]]
   %0 = tensor.insert %f into %t[%idx] : tensor<?xf32>
   // CHECK: return %[[insert]], %[[t]]
@@ -22,11 +19,9 @@ func.func @read_after_write_conflict(%t: tensor<?xf32>, %idx: index, %f: f32)
 
 // CHECK-LABEL: func @return_alloc_tensor
 // CHECK-FUNC-LABEL: func @return_alloc_tensor
-// CHECK-NO-DEALLOC-LABEL: func @return_alloc_tensor
 func.func @return_alloc_tensor() -> (tensor<5xf32>) {
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
-  // CHECK-FUNC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
+  // CHECK-FUNC: bufferization.alloc_tensor() : tensor<5xf32>
+  // CHECK: bufferization.alloc_tensor() : tensor<5xf32>
   %0 = bufferization.alloc_tensor() : tensor<5xf32>
   return %0 : tensor<5xf32>
 }
@@ -34,16 +29,12 @@ func.func @return_alloc_tensor() -> (tensor<5xf32>) {
 // -----
 
 // CHECK-LABEL: func @do_not_copy_undefined_tensor
-// CHECK-NO-DEALLOC-LABEL: func @do_not_copy_undefined_tensor
 func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
   -> (tensor<5xf32>, tensor<5xf32>)
 {
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<5xf32>
   // The second alloc_tensor should not have a copy operand.
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : i64} : tensor<5xf32>
-
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true]} : tensor<5xf32>
-  // CHECK-NO-DEALLOC: bufferization.alloc_tensor() {bufferization.escape = [true], memory_space = 0 : i64} : tensor<5xf32>
+  // CHECK: bufferization.alloc_tensor() : tensor<5xf32>
+  // CHECK: bufferization.alloc_tensor() {memory_space = 0 : i64} : tensor<5xf32>
   %0 = bufferization.alloc_tensor() : tensor<5xf32>
   %1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
   return %0, %1 : tensor<5xf32>, tensor<5xf32>
@@ -55,7 +46,7 @@ func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
 func.func @do_not_copy_when_overwritten(%t: tensor<5xf32>, %f: f32)
   -> (tensor<5xf32>, tensor<5xf32>)
 {
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : i64} : tensor<5xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {memory_space = 0 : i64} : tensor<5xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
@@ -74,7 +65,7 @@ func.func @do_not_copy_when_result_not_read(%t: tensor<5xf32>, %f: f32)
   -> (tensor<3xf32>)
 {
   %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : i64} : tensor<3xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {memory_space = 0 : i64} : tensor<3xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
index 7c8bb422f533d7c..9b6d8ccfd6587d2 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir
@@ -21,7 +21,6 @@ func.func @test_function(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf3
   // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]]
   %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor<?xf32>
 
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[res_tensor]]
   return %0 : tensor<?xf32>
 }
@@ -50,7 +49,6 @@ func.func @test_function(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf3
   // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]]
   %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor<?xf32>
 
-  // CHECK: memref.dealloc %[[alloc]]
   // CHECK: return %[[res_tensor]]
   return %0 : tensor<?xf32>
 }
@@ -117,7 +115,6 @@ module {
     // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]]
     %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor<?xf32>
 
-    // CHECK: memref.dealloc %[[alloc]]
     // CHECK: return %[[res_tensor]]
     return %0 : tensor<?xf32>
   }
diff --git a/mlir/test/Dialect/Bufferization/invalid.mlir b/mlir/test/Dialect/Bufferization/invalid.mlir
index ad3e657cd37e38e..3dfd1eb17e8d64f 100644
--- a/mlir/test/Dialect/Bufferization/invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/invalid.mlir
@@ -26,38 +26,6 @@ func.func @alloc_tensor_copy_and_dims(%t: tensor<?xf32>, %sz: index) {
 
 // -----
 
-func.func @alloc_tensor_invalid_escape_attr(%sz: index) {
-  // expected-error @+1{{'bufferization.escape' is expected to be a bool array attribute}}
-  %0 = bufferization.alloc_tensor(%sz) {bufferization.escape = 5} : tensor<?xf32>
-  return
-}
-
-// -----
-
-func.func @alloc_tensor_invalid_escape_attr_size(%sz: index) {
-  // expected-error @+1{{'bufferization.escape' has wrong number of elements, expected 1, got 2}}
-  %0 = bufferization.alloc_tensor(%sz) {bufferization.escape = [true, false]} : tensor<?xf32>
-  return
-}
-
-// -----
-
-func.func @escape_attr_non_allocating(%t0: tensor<?xf32>) {
-  // expected-error @+1{{'bufferization.escape' only valid for allocation results}}
-  %0 = tensor.extract_slice %t0[0][5][1] {bufferization.escape = [true]} : tensor<?xf32> to tensor<5xf32>
-  return
-}
-
-// -----
-
-func.func @escape_attr_non_bufferizable(%m0: memref<?xf32>) {
-  // expected-error @+1{{'bufferization.escape' only valid on bufferizable ops}}
-  %0 = memref.cast %m0 {bufferization.escape = [true]} : memref<?xf32> to memref<10xf32>
-  return
-}
-
-// -----
-
 #DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
 
 func.func @sparse_alloc_direct_return() -> tensor<20x40xf32, #DCSR> {
diff --git a/mlir/test/Dialect/ControlFlow/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/ControlFlow/one-shot-bufferize-analysis.mlir
index 84df4c8045a8862..bc8ac8d559aa484 100644
--- a/mlir/test/Dialect/ControlFlow/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/ControlFlow/one-shot-bufferize-analysis.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -one-shot-bufferize="allow-return-allocs test-analysis-only dump-alias-sets bufferize-function-boundaries" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -one-shot-bufferize="test-analysis-only dump-alias-sets bufferize-function-boundaries" -split-input-file %s | FileCheck %s
 
 // CHECK-LABEL: func @single_branch(
 //  CHECK-SAME:     {__bbarg_alias_set_attr__ = [{{\[}}[{{\[}}"%[[arg1:.*]]", "%[[t:.*]]"]], [{{\[}}"%[[arg1]]", "%[[t]]"]]]]}
diff --git a/mlir/test/Dialect/ControlFlow/one-shot-bufferize-invalid.mlir b/mlir/test/Dialect/ControlFlow/one-shot-bufferize-invalid.mlir
index 7ff837540711efc..479497ccb6f4455 100644
--- a/mlir/test/Dialect/ControlFlow/one-shot-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/ControlFlow/one-shot-bufferize-invalid.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file %s -verify-diagnostics
+// RUN: mlir-opt -one-shot-bufferize="bufferize-function-boundaries" -split-input-file %s -verify-diagnostics
 
 // expected-error @below{{failed to bufferize op}}
 // expected-error @below{{incoming operands of block argument have inconsistent memory spaces}}
diff --git a/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir b/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir
index 482cb379d57a972..b82ebdde63a1c3f 100644
--- a/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/ControlFlow/one-shot-bufferize.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -split-input-file %s | FileCheck %s
-// RUN: mlir-opt -one-shot-bufferize="allow-return-allocs" -split-input-file %s | FileCheck %s --check-prefix=CHECK-NO-FUNC
+// RUN: mlir-opt -one-shot-bufferize="bufferize-function-boundaries" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -one-shot-bufferize -split-input-file %s | FileCheck %s --check-prefix=CHECK-NO-FUNC
 
 // CHECK-NO-FUNC-LABEL: func @br(
 //  CHECK-NO-FUNC-SAME:     %[[t:.*]]: tensor<5xf32>)
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir
index 4905e2405c60ea3..2d79a80cddc2beb 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
 
 // CHECK-LABEL: @elementwise_no_conflict
 func.func @elementwise_no_conflict(%a: tensor<5xf32>,
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index f72b7d310c6d071..66d7fda2230ce8d 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -canonicalize -buffer-loop-hoisting -drop-equivalent-buffer-results -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries" -canonicalize -buffer-loop-hoisting -drop-equivalent-buffer-results -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
+// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
 
 // TODO: Some test cases from this file should be moved to other dialects.
 
@@ -77,7 +77,6 @@ func.func @not_inplace(
                      outs(%A: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
-  //     CHECK: memref.dealloc %[[ALLOC]]
   //     CHECK: return
   // CHECK-NOT: tensor
   return %r: tensor<?x?xf32>
@@ -212,7 +211,6 @@ func.func @matmul(
     scf.yield %2 : tensor<128x192xf32>
   }
 
-  // CHECK: memref.dealloc %[[ALLOC]]
   return %0 : tensor<128x192xf32>
 }
 
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-allow-return-allocs-no-deallocs.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-allow-return-allocs-no-deallocs.mlir
index 7e894b775fe06e3..6e402d539243455 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-allow-return-allocs-no-deallocs.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-allow-return-allocs-no-deallocs.mlir
@@ -1,7 +1,4 @@
-// RUN: mlir-opt %s \
-// RUN:     -one-shot-bufferize="allow-return-allocs create-deallocs=0" \
-// RUN:     -split-input-file | \
-// RUN: FileCheck %s --dump-input=always
+// RUN: mlir-opt %s -one-shot-bufferize=allow-return-allocs-from-loops -split-input-file | FileCheck %s --dump-input=always
 
 // A regression test to check that different before and after argument types are
 // bufferized successfully.
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
index 1b9143bde6821ab..7d23498f32e16dd 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only" -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
 // CHECK-LABEL: func @scf_for_yield_only
 func.func @scf_for_yield_only(
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
index 0544656034b22ce..9d206b1895f6525 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-invalid.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops allow-return-allocs" -split-input-file -verify-diagnostics
+// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops allow-return-allocs-from-loops" -split-input-file -verify-diagnostics
 
 func.func @inconsistent_memory_space_scf_if(%c: i1) -> tensor<10xf32> {
   // Yielding tensors with different memory spaces. Such IR cannot be
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
index e7001e5d4abe00f..8f4b924cfd3cc8e 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-tensor-copy-insertion.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -test-tensor-copy-insertion="allow-return-allocs" -allow-unregistered-dialect -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -test-tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
+// RUN: mlir-opt %s -test-tensor-copy-insertion=allow-return-allocs-from-loops -allow-unregistered-dialect -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -test-tensor-copy-insertion="allow-return-allocs-from-loops bufferize-function-boundaries" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
 
 // CHECK-LABEL: func @scf_for(
 //  CHECK-SAME:     %[[A:.*]]: tensor<?xf32>, %[[B:.*]]: tensor<?xf32>
@@ -7,8 +7,8 @@ func.func @scf_for(%A : tensor<?xf32>, %B : tensor<?xf32>,
                    %lb : index, %ub : index, %step : index)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
-  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor<?xf32>
-  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor<?xf32>
+  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor<?xf32>
+  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor<?xf32>
   // CHECK:   %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]])
   %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
       -> (tensor<?xf32>, tensor<?xf32>)
@@ -28,15 +28,15 @@ func.func @scf_for_swapping_yields(%A : tensor<?xf32>, %B : tensor<?xf32>,
                                    %lb : index, %ub : index, %step : index)
   -> (tensor<?xf32>, tensor<?xf32>)
 {
-  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor<?xf32>
-  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor<?xf32>
+  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor<?xf32>
+  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor<?xf32>
   // CHECK:   %[[for:.*]]:2 = scf.for {{.*}} iter_args(%[[iter1:.*]] = %[[A_copy]], %[[iter2:.*]] = %[[B_copy]])
   %r0:2 = scf.for %i = %lb to %ub step %step iter_args(%tA = %A, %tB = %B)
       -> (tensor<?xf32>, tensor<?xf32>)
   {
     // Yield tensors in different order.
-    // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[iter2]]) {bufferization.escape = [true]} : tensor<?xf32>
-    // CHECK-DAG: %[[yield2:.*]] = bufferization.alloc_tensor() copy(%[[iter1]]) {bufferization.escape = [true]} : tensor<?xf32>
+    // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[iter2]]) : tensor<?xf32>
+    // CHECK-DAG: %[[yield2:.*]] = bufferization.alloc_tensor() copy(%[[iter1]]) : tensor<?xf32>
     // CHECK: scf.yield %[[yield1]], %[[yield2]]
     scf.yield %tB, %tA : tensor<?xf32>, tensor<?xf32>
   }
@@ -51,8 +51,8 @@ func.func @scf_for_swapping_yields(%A : tensor<?xf32>, %B : tensor<?xf32>,
 func.func @scf_while(%A: tensor<5xi1>, %B: tensor<5xi1>, %idx: index)
   -> (tensor<5xi1>, tensor<5xi1>)
 {
-  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor<5xi1>
-  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor<5xi1>
+  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor<5xi1>
+  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor<5xi1>
   // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[A_copy]], %[[w1:.*]] = %[[B_copy]]) {{.*}} {
   %r0, %r1 = scf.while (%w0 = %A, %w1 = %B)
       : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) {
@@ -82,16 +82,16 @@ func.func @scf_while_non_equiv_condition_and_body(%A: tensor<5xi1>,
                                                   %idx: index)
   -> (tensor<5xi1>, tensor<5xi1>)
 {
-  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) {bufferization.escape = [false]} : tensor<5xi1>
-  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) {bufferization.escape = [false]} : tensor<5xi1>
+  // CHECK: %[[A_copy:.*]] = bufferization.alloc_tensor() copy(%[[A]]) : tensor<5xi1>
+  // CHECK: %[[B_copy:.*]] = bufferization.alloc_tensor() copy(%[[B]]) : tensor<5xi1>
   // CHECK: %[[loop:.*]]:2 = scf.while (%[[w0:.*]] = %[[A_copy]], %[[w1:.*]] = %[[B_copy]]) {{.*}} {
   %r0, %r1 = scf.while (%w0 = %A, %w1 = %B)
       : (tensor<5xi1>, tensor<5xi1>) -> (tensor<5xi1>, tensor<5xi1>) {
     // CHECK: %[[condition:.*]] = tensor.extract %[[w0]]
     %condition = tensor.extract %w0[%idx] : tensor<5xi1>
     // Yield tensors in different order.
-    // CHECK-DAG: %[[yield0:.*]] = bufferization.alloc_tensor() copy(%[[w1]]) {bufferization.escape = [true]} : tensor<5xi1>
-    // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[w0]]) {bufferization.escape = [true]} : tensor<5xi1>
+    // CHECK-DAG: %[[yield0:.*]] = bufferization.alloc_tensor() copy(%[[w1]]) : tensor<5xi1>
+    // CHECK-DAG: %[[yield1:.*]] = bufferization.alloc_tensor() copy(%[[w0]]) : tensor<5xi1>
     // CHECK: scf.condition(%[[condition]]) %[[yield0]], %[[yield1]]
     scf.condition(%condition) %w1, %w0 : tensor<5xi1>, tensor<5xi1>
   } do {
@@ -117,7 +117,7 @@ func.func @scf_forall_out_of_place(%in: tensor<100xf32>,
   %num_threads = arith.constant 100 : index
 
   // CHECK-FUNC-NOT: alloc_tensor
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[arg1]]) {bufferization.escape = [false]} : tensor<100xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[arg1]]) : tensor<100xf32>
   // CHECK: scf.forall {{.*}} shared_outs(%[[o:.*]] = %[[alloc]])
   %result = scf.forall (%thread_idx) in (%num_threads) shared_outs(%o = %out) -> tensor<100xf32> {
       // CHECK: tensor.extract_slice
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index a8c488461c74edd..9b5c0cf048c56f5 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -cse -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops bufferize-function-boundaries" -cse -canonicalize -drop-equivalent-buffer-results -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // CHECK-LABEL: func @scf_for_yield_only(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
diff --git a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
index fc9695f8c3c9870..8990fbf39b567a7 100644
--- a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -test-tensor-copy-insertion="allow-return-allocs" | FileCheck %s
-// RUN: mlir-opt %s -test-tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" | FileCheck %s --check-prefix=CHECK-FUNC
+// RUN: mlir-opt %s -test-tensor-copy-insertion | FileCheck %s
+// RUN: mlir-opt %s -test-tensor-copy-insertion="bufferize-function-boundaries" | FileCheck %s --check-prefix=CHECK-FUNC
 
 #DCSR = #sparse_tensor.encoding<{
   map = (d0, d1) -> (d0 : compressed, d1 : compressed)
@@ -8,8 +8,8 @@
 // CHECK-LABEL: func @bufferization_alloc_tensor
 // CHECK-FUNC-LABEL: func @bufferization_alloc_tensor
 func.func @bufferization_alloc_tensor() -> tensor<20x40xf32, #DCSR> {
-  // CHECK: bufferization.alloc_tensor() {bufferization.escape = [false]}
-  // CHECK-FUNC: bufferization.alloc_tensor() {bufferization.escape = [true]}
+  // CHECK: bufferization.alloc_tensor()
+  // CHECK-FUNC: bufferization.alloc_tensor()
   %0 = bufferization.alloc_tensor() : tensor<20x40xf32, #DCSR>
   %1 = sparse_tensor.load %0 : tensor<20x40xf32, #DCSR>
   return %1 : tensor<20x40xf32, #DCSR>
@@ -19,8 +19,8 @@ func.func @bufferization_alloc_tensor() -> tensor<20x40xf32, #DCSR> {
 // CHECK-LABEL: func @sparse_tensor_new
 // CHECK-FUNC-LABEL: func @sparse_tensor_new
 func.func @sparse_tensor_new(%file: !Filename) -> tensor<20x40xf32, #DCSR> {
-  // CHECK: sparse_tensor.new {{.*}} {bufferization.escape = [false]}
-  // CHECK-FUNC: sparse_tensor.new {{.*}} {bufferization.escape = [true]}
+  // CHECK: sparse_tensor.new {{.*}}
+  // CHECK-FUNC: sparse_tensor.new {{.*}}
   %0 = sparse_tensor.new %file : !Filename to tensor<20x40xf32, #DCSR>
   return %0 : tensor<20x40xf32, #DCSR>
 }
@@ -28,14 +28,14 @@ func.func @sparse_tensor_new(%file: !Filename) -> tensor<20x40xf32, #DCSR> {
 // CHECK-LABEL: func @sparse_tensor_convert
 // CHECK-FUNC-LABEL: func @sparse_tensor_convert
 func.func @sparse_tensor_convert() -> tensor<20x40xf32> {
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]}
-  // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]}
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor()
+  // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor()
   %0 = bufferization.alloc_tensor() : tensor<20x40xf32, #DCSR>
   // CHECK: %[[loaded:.*]] = sparse_tensor.load %[[alloc]]
   // CHECK-FUNC: %[[loaded:.*]] = sparse_tensor.load %[[alloc]]
   %1 = sparse_tensor.load %0 : tensor<20x40xf32, #DCSR>
-  // CHECK: sparse_tensor.convert %[[loaded]] {bufferization.escape = [false]}
-  // CHECK-FUNC: sparse_tensor.convert %[[loaded]] {bufferization.escape = [true]}
+  // CHECK: sparse_tensor.convert %[[loaded]]
+  // CHECK-FUNC: sparse_tensor.convert %[[loaded]]
   %2 = sparse_tensor.convert %1 : tensor<20x40xf32, #DCSR> to tensor<20x40xf32>
   return %2 : tensor<20x40xf32>
 }
@@ -57,9 +57,9 @@ func.func @sparse_tensor_convert() -> tensor<20x40xf32> {
 func.func @update_notinplace(%argb: tensor<10xf32>, %arga: tensor<10xf32, #SV>)
   -> (tensor<10xf32>, tensor<10xf32>)
 {
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [false]} : tensor<10xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) : tensor<10xf32>
   // CHECK: linalg.generic {{.*}} outs(%[[alloc]]
-  // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [true]} : tensor<10xf32>
+  // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) : tensor<10xf32>
   // CHECK-FUNC: linalg.generic {{.*}} outs(%[[alloc]]
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xf32, #SV>)
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
index 610ff30a48c4a4f..903e083e43681b2 100755
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
@@ -22,7 +22,7 @@
 
 // CHECK-LABEL: func.func @fold_yield_arg_zero() -> tensor<1024x1024xf64> {
 // CHECK:         %[[VAL_0:.*]] = arith.constant dense<0.000000e+00> : tensor<1024x1024xf64>
-// CHECK:         %[[VAL_1:.*]] = bufferization.alloc_tensor() copy(%[[VAL_0]]) {bufferization.escape = [false]} : tensor<1024x1024xf64>
+// CHECK:         %[[VAL_1:.*]] = bufferization.alloc_tensor() copy(%[[VAL_0]]) : tensor<1024x1024xf64>
 // CHECK:         return %[[VAL_1]] : tensor<1024x1024xf64>
 // CHECK:       }
 func.func @fold_yield_arg_zero() -> tensor<1024x1024xf64> {
@@ -41,7 +41,7 @@ func.func @fold_yield_arg_zero() -> tensor<1024x1024xf64> {
 
 // CHECK-LABEL: func.func @fold_yield_direct_zero() -> tensor<32xf64> {
 // CHECK:         %[[VAL_0:.*]] = arith.constant dense<0.000000e+00> : tensor<32xf64>
-// CHECK:         %[[VAL_1:.*]] = bufferization.alloc_tensor() copy(%[[VAL_0]]) {bufferization.escape = [false]} : tensor<32xf64>
+// CHECK:         %[[VAL_1:.*]] = bufferization.alloc_tensor() copy(%[[VAL_0]]) : tensor<32xf64>
 // CHECK:         return %[[VAL_1]] : tensor<32xf64>
 // CHECK:       }
 func.func @fold_yield_direct_zero() -> tensor<32xf64> {
@@ -64,8 +64,8 @@ func.func @fold_yield_direct_zero() -> tensor<32xf64> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant dense<0.000000e+00> : tensor<8x8xf64>
-// CHECK-DAG:       %[[VAL_7:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) {bufferization.escape = [false]} : tensor<8x8xf64>
-// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) {bufferization.escape = [false], memory_space = 0 : i64} : tensor<8x8xf64>
+// CHECK-DAG:       %[[VAL_7:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) : tensor<8x8xf64>
+// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) {memory_space = 0 : i64} : tensor<8x8xf64>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
@@ -132,8 +132,8 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant dense<0.000000e+00> : tensor<8x8xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = bufferization.alloc_tensor() copy(%[[VAL_8]]) {bufferization.escape = [false]} : tensor<8x8xf64>
-// CHECK-DAG:       %[[VAL_10:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false]} : tensor<8x8xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>
+// CHECK-DAG:       %[[VAL_9:.*]] = bufferization.alloc_tensor() copy(%[[VAL_8]]) : tensor<8x8xf64>
+// CHECK-DAG:       %[[VAL_10:.*]] = bufferization.alloc_tensor() : tensor<8x8xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{ lvlTypes = [ "compressed", "compressed" ] }>> to memref<?xindex>
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize-tensor-copy-insertion.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize-tensor-copy-insertion.mlir
index 84fe150400d7071..69af46eddff0986 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize-tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize-tensor-copy-insertion.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -test-tensor-copy-insertion -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -test-tensor-copy-insertion="bufferize-function-boundaries allow-return-allocs" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
+// RUN: mlir-opt %s -test-tensor-copy-insertion="bufferize-function-boundaries" -split-input-file | FileCheck %s --check-prefix=CHECK-FUNC
 
 // CHECK-LABEL: func @extract_slice(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?xf32>
@@ -9,8 +9,8 @@ func.func @extract_slice(%t: tensor<?xf32>, %idx: index, %f: f32)
 {
   // CHECK: %[[extract_slice:.*]] = tensor.extract_slice %[[t]][10] [5] [1]
   %0 = tensor.extract_slice %t[10][5][1] : tensor<?xf32> to tensor<5xf32>
-  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[extract_slice]]) {bufferization.escape = [false]} : tensor<5xf32>
-  // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) {bufferization.escape = [true]} : tensor<5xf32>
+  // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[extract_slice]]) : tensor<5xf32>
+  // CHECK-FUNC: bufferization.alloc_tensor() copy(%{{.*}}) : tensor<5xf32>
   // CHECK: %[[insert:.*]] = tensor.insert %{{.*}} into %[[alloc]]
   %1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
   // CHECK: return %[[insert]], %[[t]]
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
index 2aeb5a820812ea1..04877b1b21e1aab 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -1,12 +1,12 @@
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries" -drop-equivalent-buffer-results -split-input-file | FileCheck %s
 
 // Run fuzzer with different seeds.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=23 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=59 bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-fuzzer-seed=91 bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // Test bufferization using memref types that have no layout map.
-// RUN: mlir-opt %s -one-shot-bufferize="allow-return-allocs unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
+// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
 
 // CHECK-LABEL: func @insert_slice_fun
 //  CHECK-SAME:   %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
@@ -213,41 +213,6 @@ func.func @rank_reducing_parallel_insert_slice(%in: tensor<100xf32>, %out: tenso
 
 // -----
 
-// CHECK-LABEL: func @dealloc_generate_buffer
-func.func @dealloc_generate_buffer(%arg: tensor<*xf32>, %sz: index, %idx: index)
-  -> index
-{
-  // CHECK: memref.alloc
-  // CHECK: linalg.map
-  // CHECK: memref.dealloc
-  %0 = tensor.generate %sz {
-  ^bb0(%i : index):
-    %elem = tensor.dim %arg, %i : tensor<*xf32>
-    tensor.yield %elem : index
-  } : tensor<?xindex>
-  %r = tensor.extract %0[%idx] : tensor<?xindex>
-  return %r : index
-}
-
-// -----
-
-// CHECK-LABEL: func @dealloc_pad_buffer
-func.func @dealloc_pad_buffer(%t1: tensor<?x10xindex>, %l2: index, %h1: index,
-                              %h2: index, %idx: index) -> index {
-  // CHECK: memref.alloc
-  // CHECK: linalg.map
-  // CHECK: memref.dealloc
-  %0 = tensor.pad %t1 low[5, %l2] high[%h1, %h2] {
-  ^bb0(%arg0: index, %arg1: index):
-    %m = arith.muli %arg0, %arg1 : index
-    tensor.yield %m : index
-  } : tensor<?x10xindex> to tensor<?x?xindex>
-  %r = tensor.extract %0[%idx, %idx] : tensor<?x?xindex>
-  return %r : index
-}
-
-// -----
-
 // This test case could bufferize in-place with a better analysis. However, it
 // is simpler to let the canonicalizer fold away the tensor.insert_slice.
 
@@ -285,8 +250,6 @@ func.func @pad_memory_space(%t: tensor<?xf32>, %h1: index, %f: f32, %pos: index)
   } : tensor<?xf32> to tensor<15xf32>
   // CHECK: memref.load {{.*}} : memref<15xf32, 3>
   %2 = tensor.extract %1[%pos] : tensor<15xf32>
-  // CHECK-DAG: memref.dealloc %[[alloc_tensor]]
-  // CHECK-DAG: memref.dealloc %[[padded_alloc]]
   return %2 : f32
 }
 
@@ -391,7 +354,6 @@ func.func @parallel_insert_slice_source_out_of_place(%in: tensor<1xf32>, %out: t
       vector.print %r : f32
 
       // CHECK: memref.copy
-      // CHECK: memref.dealloc
       scf.forall.in_parallel {
         tensor.parallel_insert_slice %insert into %o[%thread_idx][1][1] :
           tensor<1xf32> into tensor<100xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
index 0385d6a3c92f769..06165515d4613c6 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(canonicalize,cse),one-shot-bufferize{bufferize-function-boundaries})" |\
-// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(buffer-deallocation,convert-vector-to-scf,lower-affine,convert-linalg-to-loops))" |\
+// RUN: mlir-opt -pass-pipeline="builtin.module(buffer-deallocation-pipeline,convert-bufferization-to-memref,func.func(convert-vector-to-scf,lower-affine,convert-linalg-to-loops))" |\
 // RUN: mlir-opt -pass-pipeline="builtin.module(func.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,expand-strided-metadata,lower-affine,convert-arith-to-llvm,finalize-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \
 
 // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
diff --git a/mlir/test/lib/Dialect/Bufferization/TestTensorCopyInsertion.cpp b/mlir/test/lib/Dialect/Bufferization/TestTensorCopyInsertion.cpp
index a1f6078d9c94e21..fedfbe350a51a9f 100644
--- a/mlir/test/lib/Dialect/Bufferization/TestTensorCopyInsertion.cpp
+++ b/mlir/test/lib/Dialect/Bufferization/TestTensorCopyInsertion.cpp
@@ -42,26 +42,21 @@ struct TestTensorCopyInsertionPass
 
   void runOnOperation() override {
     bufferization::OneShotBufferizationOptions options;
-    options.allowReturnAllocs = allowReturnAllocs;
+    options.allowReturnAllocsFromLoops = allowReturnAllocsFromLoops;
     options.bufferizeFunctionBoundaries = bufferizeFunctionBoundaries;
-    options.createDeallocs = createDeallocs;
     if (mustInferMemorySpace)
       options.defaultMemorySpace = std::nullopt;
     if (failed(bufferization::insertTensorCopies(getOperation(), options)))
       signalPassFailure();
   }
 
-  Option<bool> allowReturnAllocs{
-      *this, "allow-return-allocs",
-      llvm::cl::desc("Allows returning/yielding new allocations from a block."),
+  Option<bool> allowReturnAllocsFromLoops{
+      *this, "allow-return-allocs-from-loops",
+      llvm::cl::desc("Allows returning/yielding new allocations from a loop."),
       llvm::cl::init(false)};
   Option<bool> bufferizeFunctionBoundaries{
       *this, "bufferize-function-boundaries",
       llvm::cl::desc("Bufferize function boundaries."), llvm::cl::init(false)};
-  Option<bool> createDeallocs{
-      *this, "create-deallocs",
-      llvm::cl::desc("Specify if new allocations should be deallocated."),
-      llvm::cl::init(true)};
   Option<bool> mustInferMemorySpace{
       *this, "must-infer-memory-space",
       llvm::cl::desc(
diff --git a/mlir/test/python/dialects/transform_bufferization_ext.py b/mlir/test/python/dialects/transform_bufferization_ext.py
index 733bd3a2cab6feb..03b16c324a40758 100644
--- a/mlir/test/python/dialects/transform_bufferization_ext.py
+++ b/mlir/test/python/dialects/transform_bufferization_ext.py
@@ -86,10 +86,9 @@ def testOneShotBufferizeOpAttributes():
     with InsertionPoint(sequence.body):
         bufferization.OneShotBufferizeOp(
             sequence.bodyTarget,
-            allow_return_allocs=True,
+            allow_return_allocs_from_loops=True,
             allow_unknown_ops=True,
             bufferize_function_boundaries=True,
-            create_deallocs=False,
             function_boundary_type_conversion=LayoutMapOption.IdentityLayoutMap,
             memcpy_op="linalg.copy",
             print_conflicts=True,
@@ -99,10 +98,9 @@ def testOneShotBufferizeOpAttributes():
     # CHECK-LABEL: TEST: testOneShotBufferizeOpAttributes
     # CHECK: = transform.bufferization.one_shot_bufferize
     # CHECK-SAME: layout{IdentityLayoutMap}
-    # CHECK-SAME: allow_return_allocs = true
+    # CHECK-SAME: allow_return_allocs_from_loops = true
     # CHECK-SAME: allow_unknown_ops = true
     # CHECK-SAME: bufferize_function_boundaries = true
-    # CHECK-SAME: create_deallocs = false
     # CHECK-SAME: memcpy_op = "linalg.copy"
     # CHECK-SAME: print_conflicts = true
     # CHECK-SAME: test_analysis_only = true