[Mlir-commits] [mlir] 883912a - Revert "[mlir] Introduce CloneOp and adapt test cases in BufferDeallocation."
Alexander Belyaev
llvmlistbot at llvm.org
Mon Mar 29 03:49:35 PDT 2021
Author: Alexander Belyaev
Date: 2021-03-29T12:47:59+02:00
New Revision: 883912abe669ef246ada0adc9cf1c9748b742400
URL: https://github.com/llvm/llvm-project/commit/883912abe669ef246ada0adc9cf1c9748b742400
DIFF: https://github.com/llvm/llvm-project/commit/883912abe669ef246ada0adc9cf1c9748b742400.diff
LOG: Revert "[mlir] Introduce CloneOp and adapt test cases in BufferDeallocation."
This reverts commit 06b03800f3fcbf49f5ddd4145b40f04e4ba4eb42.
Until some kind of support for region args is added.
Added:
mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
mlir/lib/Transforms/CopyRemoval.cpp
mlir/test/Transforms/copy-removal.mlir
Modified:
mlir/docs/BufferDeallocationInternals.md
mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
mlir/include/mlir/Transforms/BufferUtils.h
mlir/include/mlir/Transforms/Passes.h
mlir/include/mlir/Transforms/Passes.td
mlir/lib/Dialect/MemRef/CMakeLists.txt
mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
mlir/lib/Transforms/BufferDeallocation.cpp
mlir/lib/Transforms/BufferUtils.cpp
mlir/lib/Transforms/CMakeLists.txt
mlir/test/Transforms/buffer-deallocation.mlir
mlir/test/Transforms/canonicalize.mlir
Removed:
mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
################################################################################
diff --git a/mlir/docs/BufferDeallocationInternals.md b/mlir/docs/BufferDeallocationInternals.md
index 7c731066d31e..dee37493512d 100644
--- a/mlir/docs/BufferDeallocationInternals.md
+++ b/mlir/docs/BufferDeallocationInternals.md
@@ -48,7 +48,7 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>) {
partial_write(%0, %0)
br ^bb3()
^bb3():
- test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> ()
+ "linalg.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -133,11 +133,11 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
^bb1:
br ^bb3(%arg1 : memref<2xf32>)
^bb2:
- %0 = memref.alloc() : memref<2xf32> // aliases: %1
+ %0 = alloc() : memref<2xf32> // aliases: %1
use(%0)
br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>): // %1 could be %0 or %arg1
- test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -149,7 +149,7 @@ of code:
```mlir
func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
- %0 = memref.alloc() : memref<2xf32> // moved to bb0
+ %0 = alloc() : memref<2xf32> // moved to bb0
cond_br %arg0, ^bb1, ^bb2
^bb1:
br ^bb3(%arg1 : memref<2xf32>)
@@ -157,7 +157,7 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
use(%0)
br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
- test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -179,17 +179,17 @@ func @condBranchDynamicType(
^bb1:
br ^bb3(%arg1 : memref<?xf32>)
^bb2(%0: index):
- %1 = memref.alloc(%0) : memref<?xf32> // cannot be moved upwards to the data
+ %1 = alloc(%0) : memref<?xf32> // cannot be moved upwards to the data
// dependency to %0
use(%1)
br ^bb3(%1 : memref<?xf32>)
^bb3(%2: memref<?xf32>):
- test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ "linalg.copy"(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
return
}
```
-## Introduction of Clones
+## Introduction of Copies
In order to guarantee that all allocated buffers are freed properly, we have to
pay attention to the control flow and all potential aliases a buffer allocation
@@ -200,10 +200,10 @@ allocations have already been placed:
```mlir
func @branch(%arg0: i1) {
- %0 = memref.alloc() : memref<2xf32> // aliases: %2
+ %0 = alloc() : memref<2xf32> // aliases: %2
cond_br %arg0, ^bb1, ^bb2
^bb1:
- %1 = memref.alloc() : memref<2xf32> // resides here for demonstration purposes
+ %1 = alloc() : memref<2xf32> // resides here for demonstration purposes
// aliases: %2
br ^bb3(%1 : memref<2xf32>)
^bb2:
@@ -232,31 +232,88 @@ result:
```mlir
func @branch(%arg0: i1) {
- %0 = memref.alloc() : memref<2xf32>
+ %0 = alloc() : memref<2xf32>
cond_br %arg0, ^bb1, ^bb2
^bb1:
- %1 = memref.alloc() : memref<2xf32>
- %3 = memref.clone %1 : (memref<2xf32>) -> (memref<2xf32>)
- memref.dealloc %1 : memref<2xf32> // %1 can be safely freed here
+ %1 = alloc() : memref<2xf32>
+ %3 = alloc() : memref<2xf32> // temp copy for %1
+ "linalg.copy"(%1, %3) : (memref<2xf32>, memref<2xf32>) -> ()
+ dealloc %1 : memref<2xf32> // %1 can be safely freed here
br ^bb3(%3 : memref<2xf32>)
^bb2:
use(%0)
- %4 = memref.clone %0 : (memref<2xf32>) -> (memref<2xf32>)
+ %4 = alloc() : memref<2xf32> // temp copy for %0
+ "linalg.copy"(%0, %4) : (memref<2xf32>, memref<2xf32>) -> ()
br ^bb3(%4 : memref<2xf32>)
^bb3(%2: memref<2xf32>):
…
- memref.dealloc %2 : memref<2xf32> // free temp buffer %2
- memref.dealloc %0 : memref<2xf32> // %0 can be safely freed here
+ dealloc %2 : memref<2xf32> // free temp buffer %2
+ dealloc %0 : memref<2xf32> // %0 can be safely freed here
return
}
```
Note that a temporary buffer for %2 was introduced to free all allocations
properly. Note further that the unnecessary allocation of %3 can be easily
-removed using one of the post-pass transformations or the canonicalization
-pass.
+removed using one of the post-pass transformations.
+
+Reconsider the previously introduced sample demonstrating dynamically shaped
+types:
+
+```mlir
+func @condBranchDynamicType(
+ %arg0: i1,
+ %arg1: memref<?xf32>,
+ %arg2: memref<?xf32>,
+ %arg3: index) {
+ cond_br %arg0, ^bb1, ^bb2(%arg3: index)
+^bb1:
+ br ^bb3(%arg1 : memref<?xf32>)
+^bb2(%0: index):
+ %1 = alloc(%0) : memref<?xf32> // aliases: %2
+ use(%1)
+ br ^bb3(%1 : memref<?xf32>)
+^bb3(%2: memref<?xf32>):
+ "linalg.copy"(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ return
+}
+```
-The presented example also works with dynamically shaped types.
+In the presence of DSTs, we have to parameterize the allocations with
+additional dimension information of the source buffers, we want to copy from.
+BufferDeallocation automatically introduces all required operations to extract
+dimension specifications and wires them with the associated allocations:
+
+```mlir
+func @condBranchDynamicType(
+ %arg0: i1,
+ %arg1: memref<?xf32>,
+ %arg2: memref<?xf32>,
+ %arg3: index) {
+ cond_br %arg0, ^bb1, ^bb2(%arg3 : index)
+^bb1:
+ %c0 = constant 0 : index
+ %0 = dim %arg1, %c0 : memref<?xf32> // dimension operation to parameterize
+ // the following temp allocation
+ %1 = alloc(%0) : memref<?xf32>
+ "linalg.copy"(%arg1, %1) : (memref<?xf32>, memref<?xf32>) -> ()
+ br ^bb3(%1 : memref<?xf32>)
+^bb2(%2: index):
+ %3 = alloc(%2) : memref<?xf32>
+ use(%3)
+ %c0_0 = constant 0 : index
+ %4 = dim %3, %c0_0 : memref<?xf32> // dimension operation to parameterize
+ // the following temp allocation
+ %5 = alloc(%4) : memref<?xf32>
+ "linalg.copy"(%3, %5) : (memref<?xf32>, memref<?xf32>) -> ()
+ dealloc %3 : memref<?xf32> // %3 can be safely freed here
+ br ^bb3(%5 : memref<?xf32>)
+^bb3(%6: memref<?xf32>):
+ "linalg.copy"(%6, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ dealloc %6 : memref<?xf32> // %6 can be safely freed here
+ return
+}
+```
BufferDeallocation performs a fix-point iteration taking all aliases of all
tracked allocations into account. We initialize the general iteration process
@@ -278,7 +335,7 @@ func @condBranchDynamicTypeNested(
^bb1:
br ^bb6(%arg1 : memref<?xf32>)
^bb2(%0: index):
- %1 = memref.alloc(%0) : memref<?xf32> // cannot be moved upwards due to the data
+ %1 = alloc(%0) : memref<?xf32> // cannot be moved upwards due to the data
// dependency to %0
// aliases: %2, %3, %4
use(%1)
@@ -292,7 +349,7 @@ func @condBranchDynamicTypeNested(
^bb6(%3: memref<?xf32>): // crit. alias of %arg1 and %2 (in other words %1)
br ^bb7(%3 : memref<?xf32>)
^bb7(%4: memref<?xf32>): // non-crit. alias of %3, since %3 dominates %4
- test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ "linalg.copy"(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
return
}
```
@@ -309,11 +366,13 @@ func @condBranchDynamicTypeNested(
%arg3: index) {
cond_br %arg0, ^bb1, ^bb2(%arg3 : index)
^bb1:
- // temp buffer required due to alias %3
- %5 = memref.clone %arg1 : (memref<?xf32>) -> (memref<?xf32>)
+ %c0 = constant 0 : index
+ %d0 = dim %arg1, %c0 : memref<?xf32>
+ %5 = alloc(%d0) : memref<?xf32> // temp buffer required due to alias %3
+ "linalg.copy"(%arg1, %5) : (memref<?xf32>, memref<?xf32>) -> ()
br ^bb6(%5 : memref<?xf32>)
^bb2(%0: index):
- %1 = memref.alloc(%0) : memref<?xf32>
+ %1 = alloc(%0) : memref<?xf32>
use(%1)
cond_br %arg0, ^bb3, ^bb4
^bb3:
@@ -321,14 +380,17 @@ func @condBranchDynamicTypeNested(
^bb4:
br ^bb5(%1 : memref<?xf32>)
^bb5(%2: memref<?xf32>):
- %6 = memref.clone %1 : (memref<?xf32>) -> (memref<?xf32>)
- memref.dealloc %1 : memref<?xf32>
+ %c0_0 = constant 0 : index
+ %d1 = dim %2, %c0_0 : memref<?xf32>
+ %6 = alloc(%d1) : memref<?xf32> // temp buffer required due to alias %3
+ "linalg.copy"(%1, %6) : (memref<?xf32>, memref<?xf32>) -> ()
+ dealloc %1 : memref<?xf32>
br ^bb6(%6 : memref<?xf32>)
^bb6(%3: memref<?xf32>):
br ^bb7(%3 : memref<?xf32>)
^bb7(%4: memref<?xf32>):
- test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
- memref.dealloc %3 : memref<?xf32> // free %3, since %4 is a non-crit. alias of %3
+ "linalg.copy"(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ dealloc %3 : memref<?xf32> // free %3, since %4 is a non-crit. alias of %3
return
}
```
@@ -337,7 +399,7 @@ Since %3 is a critical alias, BufferDeallocation introduces an additional
temporary copy in all predecessor blocks. %3 has an additional (non-critical)
alias %4 that extends the live range until the end of bb7. Therefore, we can
free %3 after its last use, while taking all aliases into account. Note that %4
-does not need to be freed, since we did not introduce a copy for it.
+ does not need to be freed, since we did not introduce a copy for it.
The actual introduction of buffer copies is done after the fix-point iteration
has been terminated and all critical aliases have been detected. A critical
@@ -383,7 +445,7 @@ infer the high-level control flow:
func @inner_region_control_flow(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
- %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+ %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
%1 = custom.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>)
then(%arg2 : memref<?x?xf32>) { // aliases: %arg4, %1
custom.region_if_yield %arg2 : memref<?x?xf32>
@@ -406,11 +468,11 @@ operation to determine the value of %2 at runtime which creates an alias:
```mlir
func @nested_region_control_flow(%arg0 : index, %arg1 : index) -> memref<?x?xf32> {
%0 = cmpi "eq", %arg0, %arg1 : index
- %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+ %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
scf.yield %1 : memref<?x?xf32> // %2 will be an alias of %1
} else {
- %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> // nested allocation in a div.
+ %3 = alloc(%arg0, %arg1) : memref<?x?xf32> // nested allocation in a div.
// branch
use(%3)
scf.yield %1 : memref<?x?xf32> // %2 will be an alias of %1
@@ -427,13 +489,13 @@ alias of %1 which does not need to be tracked.
```mlir
func @nested_region_control_flow(%arg0: index, %arg1: index) -> memref<?x?xf32> {
%0 = cmpi "eq", %arg0, %arg1 : index
- %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+ %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
scf.yield %1 : memref<?x?xf32>
} else {
- %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+ %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
use(%3)
- memref.dealloc %3 : memref<?x?xf32> // %3 can be safely freed here
+ dealloc %3 : memref<?x?xf32> // %3 can be safely freed here
scf.yield %1 : memref<?x?xf32>
}
return %2 : memref<?x?xf32>
@@ -452,12 +514,12 @@ above that uses a nested allocation:
func @inner_region_control_flow_div(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
- %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+ %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
%1 = custom.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>)
then(%arg2 : memref<?x?xf32>) { // aliases: %arg4, %1
custom.region_if_yield %arg2 : memref<?x?xf32>
} else(%arg3 : memref<?x?xf32>) {
- %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> // aliases: %arg4, %1
+ %2 = alloc(%arg0, %arg1) : memref<?x?xf32> // aliases: %arg4, %1
custom.region_if_yield %2 : memref<?x?xf32>
} join(%arg4 : memref<?x?xf32>) { // aliases: %1
custom.region_if_yield %arg4 : memref<?x?xf32>
@@ -475,22 +537,40 @@ This causes BufferDeallocation to introduce additional copies:
func @inner_region_control_flow_div(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
- %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+ %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
%1 = custom.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>)
then(%arg2 : memref<?x?xf32>) {
- %4 = memref.clone %arg2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+ %c0 = constant 0 : index // determine dimension extents for temp allocation
+ %2 = dim %arg2, %c0 : memref<?x?xf32>
+ %c1 = constant 1 : index
+ %3 = dim %arg2, %c1 : memref<?x?xf32>
+ %4 = alloc(%2, %3) : memref<?x?xf32> // temp buffer required due to critic.
+ // alias %arg4
+ linalg.copy(%arg2, %4) : memref<?x?xf32>, memref<?x?xf32>
custom.region_if_yield %4 : memref<?x?xf32>
} else(%arg3 : memref<?x?xf32>) {
- %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
- %5 = memref.clone %2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
- memref.dealloc %2 : memref<?x?xf32>
+ %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
+ %c0 = constant 0 : index // determine dimension extents for temp allocation
+ %3 = dim %2, %c0 : memref<?x?xf32>
+ %c1 = constant 1 : index
+ %4 = dim %2, %c1 : memref<?x?xf32>
+ %5 = alloc(%3, %4) : memref<?x?xf32> // temp buffer required due to critic.
+ // alias %arg4
+ linalg.copy(%2, %5) : memref<?x?xf32>, memref<?x?xf32>
+ dealloc %2 : memref<?x?xf32>
custom.region_if_yield %5 : memref<?x?xf32>
} join(%arg4: memref<?x?xf32>) {
- %4 = memref.clone %arg4 : (memref<?x?xf32>) -> (memref<?x?xf32>)
- memref.dealloc %arg4 : memref<?x?xf32>
+ %c0 = constant 0 : index // determine dimension extents for temp allocation
+ %2 = dim %arg4, %c0 : memref<?x?xf32>
+ %c1 = constant 1 : index
+ %3 = dim %arg4, %c1 : memref<?x?xf32>
+ %4 = alloc(%2, %3) : memref<?x?xf32> // this allocation will be removed by
+ // applying the copy removal pass
+ linalg.copy(%arg4, %4) : memref<?x?xf32>, memref<?x?xf32>
+ dealloc %arg4 : memref<?x?xf32>
custom.region_if_yield %4 : memref<?x?xf32>
}
- memref.dealloc %0 : memref<?x?xf32> // %0 can be safely freed here
+ dealloc %0 : memref<?x?xf32> // %0 can be safely freed here
return %1 : memref<?x?xf32>
}
```
@@ -520,7 +600,7 @@ func @loop_nested_if(
iter_args(%iterBuf = %buf) -> memref<2xf32> {
%1 = cmpi "eq", %i, %ub : index
%2 = scf.if %1 -> (memref<2xf32>) {
- %3 = memref.alloc() : memref<2xf32> // makes %2 a critical alias due to a
+ %3 = alloc() : memref<2xf32> // makes %2 a critical alias due to a
// divergent allocation
use(%3)
scf.yield %3 : memref<2xf32>
@@ -529,7 +609,7 @@ func @loop_nested_if(
}
scf.yield %2 : memref<2xf32>
}
- test.copy(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+ "linalg.copy"(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -554,27 +634,31 @@ func @loop_nested_if(
%step: index,
%buf: memref<2xf32>,
%res: memref<2xf32>) {
- %4 = memref.clone %buf : (memref<2xf32>) -> (memref<2xf32>)
+ %4 = alloc() : memref<2xf32>
+ "linalg.copy"(%buf, %4) : (memref<2xf32>, memref<2xf32>) -> ()
%0 = scf.for %i = %lb to %ub step %step
iter_args(%iterBuf = %4) -> memref<2xf32> {
%1 = cmpi "eq", %i, %ub : index
%2 = scf.if %1 -> (memref<2xf32>) {
- %3 = memref.alloc() : memref<2xf32> // makes %2 a critical alias
+ %3 = alloc() : memref<2xf32> // makes %2 a critical alias
use(%3)
- %5 = memref.clone %3 : (memref<2xf32>) -> (memref<2xf32>)
- memref.dealloc %3 : memref<2xf32>
+ %5 = alloc() : memref<2xf32> // temp copy due to crit. alias %2
+ "linalg.copy"(%3, %5) : memref<2xf32>, memref<2xf32>
+ dealloc %3 : memref<2xf32>
scf.yield %5 : memref<2xf32>
} else {
- %6 = memref.clone %iterBuf : (memref<2xf32>) -> (memref<2xf32>)
+ %6 = alloc() : memref<2xf32> // temp copy due to crit. alias %2
+ "linalg.copy"(%iterBuf, %6) : memref<2xf32>, memref<2xf32>
scf.yield %6 : memref<2xf32>
}
- %7 = memref.clone %2 : (memref<2xf32>) -> (memref<2xf32>)
- memref.dealloc %2 : memref<2xf32>
- memref.dealloc %iterBuf : memref<2xf32> // free backedge iteration variable
+ %7 = alloc() : memref<2xf32> // temp copy due to crit. alias %iterBuf
+ "linalg.copy"(%2, %7) : memref<2xf32>, memref<2xf32>
+ dealloc %2 : memref<2xf32>
+ dealloc %iterBuf : memref<2xf32> // free backedge iteration variable
scf.yield %7 : memref<2xf32>
}
- test.copy(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
- memref.dealloc %0 : memref<2xf32> // free temp copy %0
+ "linalg.copy"(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+ dealloc %0 : memref<2xf32> // free temp copy %0
return
}
```
@@ -600,37 +684,46 @@ deallocations.
In order to limit the complexity of the BufferDeallocation transformation, some
tiny code-polishing/optimization transformations are not applied on-the-fly
-during placement. Currently, a canonicalization pattern is added to the clone
-operation to reduce the appearance of unnecessary clones.
+during placement. Currently, there is only the CopyRemoval transformation to
+remove unnecessary copy and allocation operations.
Note: further transformations might be added to the post-pass phase in the
future.
-## Clone Canonicalization
+## CopyRemoval Pass
+
+A common pattern that arises during placement is the introduction of
+unnecessary temporary copies that are used instead of the original source
+buffer. For this reason, there is a post-pass transformation that removes these
+allocations and copies via `-copy-removal`. This pass, besides removing
+unnecessary copy operations, will also remove the dead allocations and their
+corresponding deallocation operations. The CopyRemoval pass can currently be
+applied to operations that implement the `CopyOpInterface` in any of these two
+situations which are
-During placement of clones it may happen, that unnecessary clones are inserted.
-If these clones appear with their corresponding dealloc operation within the
-same block, we can use the canonicalizer to remove these unnecessary operations.
-Note, that this step needs to take place after the insertion of clones and
-deallocs in the buffer deallocation step. The canonicalization inludes both,
-the newly created target value from the clone operation and the source
-operation.
+* reusing the source buffer of the copy operation.
+* reusing the target buffer of the copy operation.
-## Canonicalization of the Source Buffer of the Clone Operation
+## Reusing the Source Buffer of the Copy Operation
-In this case, the source of the clone operation can be used instead of its
-target. The unused allocation and deallocation operations that are defined for
-this clone operation are also removed. Here is a working example generated by
-the BufferDeallocation pass that allocates a buffer with dynamic size. A deeper
+In this case, the source of the copy operation can be used instead of target.
+The unused allocation and deallocation operations that are defined for this
+copy operation are also removed. Here is a working example generated by the
+BufferDeallocation pass that allocates a buffer with dynamic size. A deeper
analysis of this sample reveals that the highlighted operations are redundant
and can be removed.
```mlir
func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
- %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
- %2 = memref.clone %1 : (memref<?x?xf32>) -> (memref<?x?xf32>)
- memref.dealloc %1 : memref<?x?xf32>
- return %2 : memref<?x?xf32>
+ %7 = alloc(%arg0, %arg1) : memref<?x?xf32>
+ %c0_0 = constant 0 : index
+ %8 = dim %7, %c0_0 : memref<?x?xf32>
+ %c1_1 = constant 1 : index
+ %9 = dim %7, %c1_1 : memref<?x?xf32>
+ %10 = alloc(%8, %9) : memref<?x?xf32>
+ linalg.copy(%7, %10) : memref<?x?xf32>, memref<?x?xf32>
+ dealloc %7 : memref<?x?xf32>
+ return %10 : memref<?x?xf32>
}
```
@@ -638,39 +731,53 @@ Will be transformed to:
```mlir
func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
- %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
- return %1 : memref<?x?xf32>
+ %7 = alloc(%arg0, %arg1) : memref<?x?xf32>
+ %c0_0 = constant 0 : index
+ %8 = dim %7, %c0_0 : memref<?x?xf32>
+ %c1_1 = constant 1 : index
+ %9 = dim %7, %c1_1 : memref<?x?xf32>
+ return %7 : memref<?x?xf32>
}
```
-In this case, the additional copy %2 can be replaced with its original source
-buffer %1. This also applies to the associated dealloc operation of %1.
+In this case, the additional copy %10 can be replaced with its original source
+buffer %7. This also applies to the associated dealloc operation of %7.
-## Canonicalization of the Target Buffer of the Clone Operation
+To limit the complexity of this transformation, it only removes copy operations
+when the following constraints are met:
-In this case, the target buffer of the clone operation can be used instead of
-its source. The unused deallocation operation that is defined for this clone
-operation is also removed.
+* The copy operation, the defining operation for the target value, and the
+deallocation of the source value lie in the same block.
+* There are no users/aliases of the target value between the defining operation
+of the target value and its copy operation.
+* There are no users/aliases of the source value between its associated copy
+operation and the deallocation of the source value.
-Consider the following example where a generic test operation writes the result
-to %temp and then copies %temp to %result. However, these two operations
-can be merged into a single step. Canonicalization removes the clone operation
-and %temp, and replaces the uses of %temp with %result:
+## Reusing the Target Buffer of the Copy Operation
+
+In this case, the target buffer of the copy operation can be used instead of
+its source. The unused allocation and deallocation operations that are defined
+for this copy operation are also removed.
+
+Consider the following example where a generic linalg operation writes the
+result to %temp and then copies %temp to %result. However, these two operations
+can be merged into a single step. Copy removal removes the copy operation and
+%temp, and replaces the uses of %temp with %result:
```mlir
func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
- %temp = memref.alloc() : memref<2xf32>
- test.generic {
+ %temp = alloc() : memref<2xf32>
+ linalg.generic {
args_in = 1 : i64,
args_out = 1 : i64,
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]} %arg0, %temp {
^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
%tmp2 = exp %gen2_arg0 : f32
- test.yield %tmp2 : f32
+ linalg.yield %tmp2 : f32
}: memref<2xf32>, memref<2xf32>
- %result = memref.clone %temp : (memref<2xf32>) -> (memref<2xf32>)
- memref.dealloc %temp : memref<2xf32>
+ "linalg.copy"(%temp, %result) : (memref<2xf32>, memref<2xf32>) -> ()
+ dealloc %temp : memref<2xf32>
return
}
```
@@ -679,24 +786,33 @@ Will be transformed to:
```mlir
func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
- test.generic {
+ linalg.generic {
args_in = 1 : i64,
args_out = 1 : i64,
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]} %arg0, %result {
^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
%tmp2 = exp %gen2_arg0 : f32
- test.yield %tmp2 : f32
+ linalg.yield %tmp2 : f32
}: memref<2xf32>, memref<2xf32>
return
}
```
+Like before, several constraints to use the transformation apply:
+
+* The copy operation, the defining operation of the source value, and the
+deallocation of the source value lie in the same block.
+* There are no users/aliases of the target value between the defining operation
+of the source value and the copy operation.
+* There are no users/aliases of the source value between the copy operation and
+the deallocation of the source value.
+
## Known Limitations
-BufferDeallocation introduces additional clones from “memref” dialect
-(“memref.clone”). Analogous, all deallocations use the “memref” dialect-free
-operation “memref.dealloc”. The actual copy process is realized using
-“test.copy”. Furthermore, buffers are essentially immutable after their
-creation in a block. Another limitations are known in the case using
-unstructered control flow.
+BufferDeallocation introduces additional copies using allocations from the
+“memref” dialect (“memref.alloc”). Analogous, all deallocations use the
+“memref” dialect-free operation “memref.dealloc”. The actual copy process is
+realized using “linalg.copy”. Furthermore, buffers are essentially immutable
+after their creation in a block. Another limitations are known in the case
+using unstructered control flow.
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
index 054242397783..9c2b912c0df1 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@@ -12,7 +12,6 @@
#include "mlir/IR/Dialect.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/CastInterfaces.h"
-#include "mlir/Interfaces/CopyOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index fe0fd7d0ff36..b3f5257df782 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -12,7 +12,6 @@
include "mlir/Dialect/MemRef/IR/MemRefBase.td"
include "mlir/IR/OpBase.td"
include "mlir/Interfaces/CastInterfaces.td"
-include "mlir/Interfaces/CopyOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ViewLikeInterface.td"
include "mlir/IR/SymbolInterfaces.td"
@@ -215,9 +214,6 @@ def MemRef_BufferCastOp : MemRef_Op<"buffer_cast",
// Result type is tensor<4x?xf32>
%12 = memref.buffer_cast %10 : memref<4x?xf32, #map0, 42>
```
-
- Note, that mutating the result of the buffer cast operation leads to
- undefined behavior.
}];
let arguments = (ins AnyTensor:$tensor);
@@ -316,46 +312,6 @@ def MemRef_CastOp : MemRef_Op<"cast", [
let hasFolder = 1;
}
-//===----------------------------------------------------------------------===//
-// CloneOp
-//===----------------------------------------------------------------------===//
-
-def CloneOp : MemRef_Op<"clone", [
- CopyOpInterface,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
- ]> {
- let builders = [
- OpBuilder<(ins "Value":$value), [{
- return build($_builder, $_state, value.getType(), value);
- }]>];
-
- let description = [{
- Clones the data in the input view into an implicitly defined output view.
-
- Usage:
-
- ```mlir
- %arg1 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
- ```
-
- Note, that mutating the source or result of the clone operation leads to
- undefined behavior.
- }];
-
- let arguments = (ins Arg<AnyMemRef, "", []>:$input);
- let results = (outs Arg<AnyMemRef, "", []>:$output);
-
- let extraClassDeclaration = [{
- Value getSource() { return input();}
- Value getTarget() { return output(); }
- }];
-
- let assemblyFormat = "$input attr-dict `:` type($input) `to` type($output)";
-
- let hasFolder = 1;
- let hasCanonicalizer = 1;
-}
-
//===----------------------------------------------------------------------===//
// DeallocOp
//===----------------------------------------------------------------------===//
@@ -1134,9 +1090,6 @@ def TensorLoadOp : MemRef_Op<"tensor_load",
// Produces a value of tensor<4x?xf32> type.
%12 = memref.tensor_load %10 : memref<4x?xf32, #layout, memspace0>
```
-
- If tensor load is used in the bufferization steps, mutating the source
- buffer after loading leads to undefined behavior.
}];
let arguments = (ins Arg<AnyRankedOrUnrankedMemRef,
diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
deleted file mode 100644
index 024fe5ebfbc3..000000000000
--- a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===- MemRefUtils.h - MemRef transformation utilities ----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This header file defines prototypes for various transformation utilities for
-// the MemRefOps dialect. These are not passes by themselves but are used
-// either by passes, optimization sequences, or in turn by other transformation
-// utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_MEMREF_UTILS_MEMREFUTILS_H
-#define MLIR_DIALECT_MEMREF_UTILS_MEMREFUTILS_H
-
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-
-namespace mlir {
-
-/// Finds the associated dealloc that can be linked to our allocation nodes (if
-/// any).
-Operation *findDealloc(Value allocValue);
-
-} // end namespace mlir
-
-#endif // MLIR_DIALECT_MEMREF_UTILS_MEMREFUTILS_H
diff --git a/mlir/include/mlir/Transforms/BufferUtils.h b/mlir/include/mlir/Transforms/BufferUtils.h
index e432fb8f53f5..33edffa372a3 100644
--- a/mlir/include/mlir/Transforms/BufferUtils.h
+++ b/mlir/include/mlir/Transforms/BufferUtils.h
@@ -39,6 +39,10 @@ class BufferPlacementAllocs {
static Operation *getStartOperation(Value allocValue, Block *placementBlock,
const Liveness &liveness);
+ /// Find an associated dealloc operation that is linked to the given
+ /// allocation node (if any).
+ static Operation *findDealloc(Value allocValue);
+
public:
/// Initializes the internal list by discovering all supported allocation
/// nodes.
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 1d4234b38efc..60ea4b188ae1 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -63,6 +63,9 @@ std::unique_ptr<Pass> createBufferResultsToOutParamsPass();
/// Creates an instance of the Canonicalizer pass.
std::unique_ptr<Pass> createCanonicalizerPass();
+/// Create a pass that removes unnecessary Copy operations.
+std::unique_ptr<Pass> createCopyRemovalPass();
+
/// Creates a pass to perform common sub expression elimination.
std::unique_ptr<Pass> createCSEPass();
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 0e14dcb873e7..2305c4a39191 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -282,6 +282,8 @@ def BufferDeallocation : FunctionPass<"buffer-deallocation"> {
}];
let constructor = "mlir::createBufferDeallocationPass()";
+ // TODO: this pass likely shouldn't depend on Linalg?
+ let dependentDialects = ["linalg::LinalgDialect"];
}
def BufferHoisting : FunctionPass<"buffer-hoisting"> {
@@ -364,6 +366,11 @@ def Canonicalizer : Pass<"canonicalize"> {
let dependentDialects = ["memref::MemRefDialect"];
}
+def CopyRemoval : FunctionPass<"copy-removal"> {
+ let summary = "Remove the redundant copies from input IR";
+ let constructor = "mlir::createCopyRemovalPass()";
+}
+
def CSE : Pass<"cse"> {
let summary = "Eliminate common sub-expressions";
let description = [{
diff --git a/mlir/lib/Dialect/MemRef/CMakeLists.txt b/mlir/lib/Dialect/MemRef/CMakeLists.txt
index 737094373b0e..f33061b2d87c 100644
--- a/mlir/lib/Dialect/MemRef/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/CMakeLists.txt
@@ -1,22 +1 @@
-add_mlir_dialect_library(MLIRMemRef
- IR/MemRefDialect.cpp
- IR/MemRefOps.cpp
- Utils/MemRefUtils.cpp
-
- ADDITIONAL_HEADER_DIRS
- ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRefDialect
-
- DEPENDS
- MLIRStandardOpsIncGen
- MLIRMemRefOpsIncGen
-
- LINK_COMPONENTS
- Core
-
- LINK_LIBS PUBLIC
- MLIRDialect
- MLIRIR
- MLIRStandard
- MLIRTensor
- MLIRViewLikeInterface
-)
+add_subdirectory(IR)
diff --git a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
new file mode 100644
index 000000000000..aa9d57beb105
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_mlir_dialect_library(MLIRMemRef
+ MemRefDialect.cpp
+ MemRefOps.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRefDialect
+
+ DEPENDS
+ MLIRStandardOpsIncGen
+ MLIRMemRefOpsIncGen
+
+ LINK_COMPONENTS
+ Core
+
+ LINK_LIBS PUBLIC
+ MLIRDialect
+ MLIRIR
+ MLIRStandard
+ MLIRTensor
+ MLIRViewLikeInterface
+)
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index fc179b29c239..546c43a97407 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/StandardOps/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -463,76 +462,6 @@ OpFoldResult CastOp::fold(ArrayRef<Attribute> operands) {
return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
}
-//===----------------------------------------------------------------------===//
-// CloneOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(CloneOp op) { return success(); }
-
-void CloneOp::getEffects(
- SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
- &effects) {
- effects.emplace_back(MemoryEffects::Read::get(), input(),
- SideEffects::DefaultResource::get());
- effects.emplace_back(MemoryEffects::Write::get(), output(),
- SideEffects::DefaultResource::get());
-}
-
-namespace {
-/// Fold Dealloc operations that are deallocating an AllocOp that is only used
-/// by other Dealloc operations.
-struct SimplifyClones : public OpRewritePattern<CloneOp> {
- using OpRewritePattern<CloneOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(CloneOp cloneOp,
- PatternRewriter &rewriter) const override {
- if (cloneOp.use_empty()) {
- rewriter.eraseOp(cloneOp);
- return success();
- }
-
- Value source = cloneOp.input();
-
- // Removes the clone operation and the corresponding dealloc and alloc
- // operation (if any).
- auto tryRemoveClone = [&](Operation *sourceOp, Operation *dealloc,
- Operation *alloc) {
- if (!sourceOp || !dealloc || !alloc ||
- alloc->getBlock() != dealloc->getBlock())
- return false;
- rewriter.replaceOp(cloneOp, source);
- rewriter.eraseOp(dealloc);
- return true;
- };
-
- // Removes unnecessary clones that are derived from the result of the clone
- // op.
- Operation *deallocOp = findDealloc(cloneOp.output());
- Operation *sourceOp = source.getDefiningOp();
- if (tryRemoveClone(sourceOp, deallocOp, sourceOp))
- return success();
-
- // Removes unnecessary clones that are derived from the source of the clone
- // op.
- deallocOp = findDealloc(source);
- if (tryRemoveClone(sourceOp, deallocOp, cloneOp))
- return success();
-
- return failure();
- }
-};
-
-} // end anonymous namespace.
-
-void CloneOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
- MLIRContext *context) {
- results.insert<SimplifyClones>(context);
-}
-
-OpFoldResult CloneOp::fold(ArrayRef<Attribute> operands) {
- return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
-}
-
//===----------------------------------------------------------------------===//
// DeallocOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
deleted file mode 100644
index 26a9a217134e..000000000000
--- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- Utils.cpp - Utilities to support the MemRef dialect ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements utilities for the MemRef dialect.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-
-using namespace mlir;
-
-/// Finds associated deallocs that can be linked to our allocation nodes (if
-/// any).
-Operation *mlir::findDealloc(Value allocValue) {
- auto userIt = llvm::find_if(allocValue.getUsers(), [&](Operation *user) {
- auto effectInterface = dyn_cast<MemoryEffectOpInterface>(user);
- if (!effectInterface)
- return false;
- // Try to find a free effect that is applied to one of our values
- // that will be automatically freed by our pass.
- SmallVector<MemoryEffects::EffectInstance, 2> effects;
- effectInterface.getEffectsOnValue(allocValue, effects);
- return llvm::any_of(effects, [&](MemoryEffects::EffectInstance &it) {
- return isa<MemoryEffects::Free>(it.getEffect());
- });
- });
- // Assign the associated dealloc operation (if any).
- return userIt != allocValue.user_end() ? *userIt : nullptr;
-}
diff --git a/mlir/lib/Transforms/BufferDeallocation.cpp b/mlir/lib/Transforms/BufferDeallocation.cpp
index 3ba744d8e6ef..aa837cb0e77c 100644
--- a/mlir/lib/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Transforms/BufferDeallocation.cpp
@@ -7,15 +7,16 @@
//===----------------------------------------------------------------------===//
//
// This file implements logic for computing correct alloc and dealloc positions.
-// Furthermore, buffer deallocation also adds required new clone operations to
-// ensure that all buffers are deallocated. The main class is the
+// Furthermore, buffer placement also adds required new alloc and copy
+// operations to ensure that all buffers are deallocated. The main class is the
// BufferDeallocationPass class that implements the underlying algorithm. In
// order to put allocations and deallocations at safe positions, it is
// significantly important to put them into the correct blocks. However, the
// liveness analysis does not pay attention to aliases, which can occur due to
// branches (and their associated block arguments) in general. For this purpose,
// BufferDeallocation firstly finds all possible aliases for a single value
-// (using the BufferAliasAnalysis class). Consider the following example:
+// (using the BufferAliasAnalysis class). Consider the following
+// example:
//
// ^bb0(%arg0):
// cond_br %cond, ^bb1, ^bb2
@@ -29,16 +30,16 @@
//
// We should place the dealloc for %new_value in exit. However, we have to free
// the buffer in the same block, because it cannot be freed in the post
-// dominator. However, this requires a new clone buffer for %arg1 that will
+// dominator. However, this requires a new copy buffer for %arg1 that will
// contain the actual contents. Using the class BufferAliasAnalysis, we
// will find out that %new_value has a potential alias %arg1. In order to find
// the dealloc position we have to find all potential aliases, iterate over
// their uses and find the common post-dominator block (note that additional
-// clones and buffers remove potential aliases and will influence the placement
+// copies and buffers remove potential aliases and will influence the placement
// of the deallocs). In all cases, the computed block can be safely used to free
// the %new_value buffer (may be exit or bb2) as it will die and we can use
// liveness information to determine the exact operation after which we have to
-// insert the dealloc. However, the algorithm supports introducing clone buffers
+// insert the dealloc. However, the algorithm supports introducing copy buffers
// and placing deallocs in safe locations to ensure that all buffers will be
// freed in the end.
//
@@ -51,8 +52,10 @@
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/StandardOps/Utils/Utils.h"
#include "mlir/IR/Operation.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
@@ -184,25 +187,25 @@ class Backedges {
/// The buffer deallocation transformation which ensures that all allocs in the
/// program have a corresponding de-allocation. As a side-effect, it might also
-/// introduce clones that in turn leads to additional deallocations.
+/// introduce copies that in turn leads to additional allocs and de-allocations.
class BufferDeallocation : BufferPlacementTransformationBase {
public:
BufferDeallocation(Operation *op)
: BufferPlacementTransformationBase(op), dominators(op),
postDominators(op) {}
- /// Performs the actual placement/creation of all temporary clone and dealloc
- /// nodes.
+ /// Performs the actual placement/creation of all temporary alloc, copy and
+ /// dealloc nodes.
void deallocate() {
- // Add additional clones that are required.
- introduceClones();
+ // Add additional allocations and copies that are required.
+ introduceCopies();
// Place deallocations for all allocation entries.
placeDeallocs();
}
private:
- /// Introduces required clone operations to avoid memory leaks.
- void introduceClones() {
+ /// Introduces required allocs and copy operations to avoid memory leaks.
+ void introduceCopies() {
// Initialize the set of values that require a dedicated memory free
// operation since their operands cannot be safely deallocated in a post
// dominator.
@@ -211,7 +214,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
SmallVector<std::tuple<Value, Block *>, 8> toProcess;
// Check dominance relation for proper dominance properties. If the given
- // value node does not dominate an alias, we will have to create a clone in
+ // value node does not dominate an alias, we will have to create a copy in
// order to free all buffers that can potentially leak into a post
// dominator.
auto findUnsafeValues = [&](Value source, Block *definingBlock) {
@@ -252,7 +255,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
// arguments at the correct locations.
aliases.remove(valuesToFree);
- // Add new allocs and additional clone operations.
+ // Add new allocs and additional copy operations.
for (Value value : valuesToFree) {
if (auto blockArg = value.dyn_cast<BlockArgument>())
introduceBlockArgCopy(blockArg);
@@ -266,7 +269,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
}
}
- /// Introduces temporary clones in all predecessors and copies the source
+ /// Introduces temporary allocs in all predecessors and copies the source
/// values into the newly allocated buffers.
void introduceBlockArgCopy(BlockArgument blockArg) {
// Allocate a buffer for the current block argument in the block of
@@ -282,9 +285,9 @@ class BufferDeallocation : BufferPlacementTransformationBase {
Value sourceValue =
branchInterface.getSuccessorOperands(it.getSuccessorIndex())
.getValue()[blockArg.getArgNumber()];
- // Create a new clone at the current location of the terminator.
- Value clone = introduceCloneBuffers(sourceValue, terminator);
- // Wire new clone and successor operand.
+ // Create a new alloc and copy at the current location of the terminator.
+ Value alloc = introduceBufferCopy(sourceValue, terminator);
+ // Wire new alloc and successor operand.
auto mutableOperands =
branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex());
if (!mutableOperands.hasValue())
@@ -293,7 +296,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
else
mutableOperands.getValue()
.slice(blockArg.getArgNumber(), 1)
- .assign(clone);
+ .assign(alloc);
}
// Check whether the block argument has implicitly defined predecessors via
@@ -307,7 +310,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
!(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
return;
- introduceClonesForRegionSuccessors(
+ introduceCopiesForRegionSuccessors(
regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
[&](RegionSuccessor &successorRegion) {
// Find a predecessor of our argRegion.
@@ -315,7 +318,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
});
// Check whether the block argument belongs to an entry region of the
- // parent operation. In this case, we have to introduce an additional clone
+ // parent operation. In this case, we have to introduce an additional copy
// for buffer that is passed to the argument.
SmallVector<RegionSuccessor, 2> successorRegions;
regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions);
@@ -326,20 +329,20 @@ class BufferDeallocation : BufferPlacementTransformationBase {
if (it == successorRegions.end())
return;
- // Determine the actual operand to introduce a clone for and rewire the
- // operand to point to the clone instead.
+ // Determine the actual operand to introduce a copy for and rewire the
+ // operand to point to the copy instead.
Value operand =
regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber())
[llvm::find(it->getSuccessorInputs(), blockArg).getIndex()];
- Value clone = introduceCloneBuffers(operand, parentOp);
+ Value copy = introduceBufferCopy(operand, parentOp);
auto op = llvm::find(parentOp->getOperands(), operand);
assert(op != parentOp->getOperands().end() &&
"parentOp does not contain operand");
- parentOp->setOperand(op.getIndex(), clone);
+ parentOp->setOperand(op.getIndex(), copy);
}
- /// Introduces temporary clones in front of all associated nested-region
+ /// Introduces temporary allocs in front of all associated nested-region
/// terminators and copies the source values into the newly allocated buffers.
void introduceValueCopyForRegionResult(Value value) {
// Get the actual result index in the scope of the parent terminator.
@@ -351,20 +354,20 @@ class BufferDeallocation : BufferPlacementTransformationBase {
// its parent operation.
return !successorRegion.getSuccessor();
};
- // Introduce a clone for all region "results" that are returned to the
- // parent operation. This is required since the parent's result value has
- // been considered critical. Therefore, the algorithm assumes that a clone
- // of a previously allocated buffer is returned by the operation (like in
- // the case of a block argument).
- introduceClonesForRegionSuccessors(regionInterface, operation->getRegions(),
+ // Introduce a copy for all region "results" that are returned to the parent
+ // operation. This is required since the parent's result value has been
+ // considered critical. Therefore, the algorithm assumes that a copy of a
+ // previously allocated buffer is returned by the operation (like in the
+ // case of a block argument).
+ introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(),
value, regionPredicate);
}
- /// Introduces buffer clones for all terminators in the given regions. The
+ /// Introduces buffer copies for all terminators in the given regions. The
/// regionPredicate is applied to every successor region in order to restrict
- /// the clones to specific regions.
+ /// the copies to specific regions.
template <typename TPredicate>
- void introduceClonesForRegionSuccessors(
+ void introduceCopiesForRegionSuccessors(
RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
Value argValue, const TPredicate ®ionPredicate) {
for (Region ®ion : regions) {
@@ -390,37 +393,49 @@ class BufferDeallocation : BufferPlacementTransformationBase {
walkReturnOperations(®ion, [&](Operation *terminator) {
// Extract the source value from the current terminator.
Value sourceValue = terminator->getOperand(operandIndex);
- // Create a new clone at the current location of the terminator.
- Value clone = introduceCloneBuffers(sourceValue, terminator);
- // Wire clone and terminator operand.
- terminator->setOperand(operandIndex, clone);
+ // Create a new alloc at the current location of the terminator.
+ Value alloc = introduceBufferCopy(sourceValue, terminator);
+ // Wire alloc and terminator operand.
+ terminator->setOperand(operandIndex, alloc);
});
}
}
- /// Creates a new memory allocation for the given source value and clones
+ /// Creates a new memory allocation for the given source value and copies
/// its content into the newly allocated buffer. The terminator operation is
- /// used to insert the clone operation at the right place.
- Value introduceCloneBuffers(Value sourceValue, Operation *terminator) {
- // Avoid multiple clones of the same source value. This can happen in the
+ /// used to insert the alloc and copy operations at the right places.
+ Value introduceBufferCopy(Value sourceValue, Operation *terminator) {
+ // Avoid multiple copies of the same source value. This can happen in the
// presence of loops when a branch acts as a backedge while also having
// another successor that returns to its parent operation. Note: that
// copying copied buffers can introduce memory leaks since the invariant of
- // BufferDeallocation assumes that a buffer will be only cloned once into a
- // temporary buffer. Hence, the construction of clone chains introduces
+ // BufferPlacement assumes that a buffer will be only copied once into a
+ // temporary buffer. Hence, the construction of copy chains introduces
// additional allocations that are not tracked automatically by the
// algorithm.
- if (clonedValues.contains(sourceValue))
+ if (copiedValues.contains(sourceValue))
return sourceValue;
- // Create a new clone operation that copies the contents of the old
- // buffer to the new one.
+ // Create a new alloc at the current location of the terminator.
+ auto memRefType = sourceValue.getType().cast<MemRefType>();
OpBuilder builder(terminator);
- auto cloneOp =
- builder.create<memref::CloneOp>(terminator->getLoc(), sourceValue);
- // Remember the clone of original source value.
- clonedValues.insert(cloneOp);
- return cloneOp;
+ // Extract information about dynamically shaped types by
+ // extracting their dynamic dimensions.
+ auto dynamicOperands =
+ getDynOperands(terminator->getLoc(), sourceValue, builder);
+
+ // TODO: provide a generic interface to create dialect-specific
+ // Alloc and CopyOp nodes.
+ auto alloc = builder.create<memref::AllocOp>(terminator->getLoc(),
+ memRefType, dynamicOperands);
+
+ // Create a new copy operation that copies to contents of the old
+ // allocation to the new one.
+ builder.create<linalg::CopyOp>(terminator->getLoc(), sourceValue, alloc);
+
+ // Remember the copy of original source value.
+ copiedValues.insert(alloc);
+ return alloc;
}
/// Finds correct dealloc positions according to the algorithm described at
@@ -498,8 +513,8 @@ class BufferDeallocation : BufferPlacementTransformationBase {
/// position.
PostDominanceInfo postDominators;
- /// Stores already cloned buffers to avoid additional clones of clones.
- ValueSetT clonedValues;
+ /// Stores already copied allocations to avoid additional copies of copies.
+ ValueSetT copiedValues;
};
//===----------------------------------------------------------------------===//
@@ -507,8 +522,8 @@ class BufferDeallocation : BufferPlacementTransformationBase {
//===----------------------------------------------------------------------===//
/// The actual buffer deallocation pass that inserts and moves dealloc nodes
-/// into the right positions. Furthermore, it inserts additional clones if
-/// necessary. It uses the algorithm described at the top of the file.
+/// into the right positions. Furthermore, it inserts additional allocs and
+/// copies if necessary. It uses the algorithm described at the top of the file.
struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
void runOnFunction() override {
@@ -525,7 +540,7 @@ struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
return signalPassFailure();
}
- // Place all required temporary clone and dealloc nodes.
+ // Place all required temporary alloc, copy and dealloc nodes.
BufferDeallocation deallocation(getFunction());
deallocation.deallocate();
}
diff --git a/mlir/lib/Transforms/BufferUtils.cpp b/mlir/lib/Transforms/BufferUtils.cpp
index 0cefd53d2d34..ab39f57b3fcc 100644
--- a/mlir/lib/Transforms/BufferUtils.cpp
+++ b/mlir/lib/Transforms/BufferUtils.cpp
@@ -12,7 +12,7 @@
#include "mlir/Transforms/BufferUtils.h"
#include "PassDetail.h"
-#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Operation.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
@@ -49,6 +49,25 @@ Operation *BufferPlacementAllocs::getStartOperation(Value allocValue,
return startOperation;
}
+/// Finds associated deallocs that can be linked to our allocation nodes (if
+/// any).
+Operation *BufferPlacementAllocs::findDealloc(Value allocValue) {
+ auto userIt = llvm::find_if(allocValue.getUsers(), [&](Operation *user) {
+ auto effectInterface = dyn_cast<MemoryEffectOpInterface>(user);
+ if (!effectInterface)
+ return false;
+ // Try to find a free effect that is applied to one of our values
+ // that will be automatically freed by our pass.
+ SmallVector<MemoryEffects::EffectInstance, 2> effects;
+ effectInterface.getEffectsOnValue(allocValue, effects);
+ return llvm::any_of(effects, [&](MemoryEffects::EffectInstance &it) {
+ return isa<MemoryEffects::Free>(it.getEffect());
+ });
+ });
+ // Assign the associated dealloc operation (if any).
+ return userIt != allocValue.user_end() ? *userIt : nullptr;
+}
+
/// Initializes the internal list by discovering all supported allocation
/// nodes.
BufferPlacementAllocs::BufferPlacementAllocs(Operation *op) { build(op); }
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 2b185fcf0b7e..36f9e5b832be 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -7,6 +7,7 @@ add_mlir_library(MLIRTransforms
BufferUtils.cpp
Bufferize.cpp
Canonicalizer.cpp
+ CopyRemoval.cpp
CSE.cpp
Inliner.cpp
LocationSnapshot.cpp
diff --git a/mlir/lib/Transforms/CopyRemoval.cpp b/mlir/lib/Transforms/CopyRemoval.cpp
new file mode 100644
index 000000000000..c5a8da632956
--- /dev/null
+++ b/mlir/lib/Transforms/CopyRemoval.cpp
@@ -0,0 +1,217 @@
+//===- CopyRemoval.cpp - Removing the redundant copies --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/CopyOpInterface.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+using namespace MemoryEffects;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// CopyRemovalPass
+//===----------------------------------------------------------------------===//
+
+/// This pass removes the redundant Copy operations. Additionally, it
+/// removes the leftover definition and deallocation operations by erasing the
+/// copy operation.
+class CopyRemovalPass : public PassWrapper<CopyRemovalPass, OperationPass<>> {
+public:
+ void runOnOperation() override {
+ getOperation()->walk([&](CopyOpInterface copyOp) {
+ reuseCopySourceAsTarget(copyOp);
+ reuseCopyTargetAsSource(copyOp);
+ });
+ for (std::pair<Value, Value> &pair : replaceList)
+ pair.first.replaceAllUsesWith(pair.second);
+ for (Operation *op : eraseList)
+ op->erase();
+ }
+
+private:
+ /// List of operations that need to be removed.
+ llvm::SmallPtrSet<Operation *, 4> eraseList;
+
+ /// List of values that need to be replaced with their counterparts.
+ llvm::SmallDenseSet<std::pair<Value, Value>, 4> replaceList;
+
+ /// Returns the allocation operation for `value` in `block` if it exists.
+ /// nullptr otherwise.
+ Operation *getAllocationOpInBlock(Value value, Block *block) {
+ assert(block && "Block cannot be null");
+ Operation *op = value.getDefiningOp();
+ if (op && op->getBlock() == block) {
+ auto effects = dyn_cast<MemoryEffectOpInterface>(op);
+ if (effects && effects.hasEffect<Allocate>())
+ return op;
+ }
+ return nullptr;
+ }
+
+ /// Returns the deallocation operation for `value` in `block` if it exists.
+ /// nullptr otherwise.
+ Operation *getDeallocationOpInBlock(Value value, Block *block) {
+ assert(block && "Block cannot be null");
+ auto valueUsers = value.getUsers();
+ auto it = llvm::find_if(valueUsers, [&](Operation *op) {
+ auto effects = dyn_cast<MemoryEffectOpInterface>(op);
+ return effects && op->getBlock() == block && effects.hasEffect<Free>();
+ });
+ return (it == valueUsers.end() ? nullptr : *it);
+ }
+
+ /// Returns true if an operation between start and end operations has memory
+ /// effect.
+ bool hasMemoryEffectOpBetween(Operation *start, Operation *end) {
+ assert((start || end) && "Start and end operations cannot be null");
+ assert(start->getBlock() == end->getBlock() &&
+ "Start and end operations should be in the same block.");
+ Operation *op = start->getNextNode();
+ while (op->isBeforeInBlock(end)) {
+ if (isa<MemoryEffectOpInterface>(op))
+ return true;
+ op = op->getNextNode();
+ }
+ return false;
+ };
+
+ /// Returns true if `val` value has at least a user between `start` and
+ /// `end` operations.
+ bool hasUsersBetween(Value val, Operation *start, Operation *end) {
+ assert((start || end) && "Start and end operations cannot be null");
+ Block *block = start->getBlock();
+ assert(block == end->getBlock() &&
+ "Start and end operations should be in the same block.");
+ return llvm::any_of(val.getUsers(), [&](Operation *op) {
+ return op->getBlock() == block && start->isBeforeInBlock(op) &&
+ op->isBeforeInBlock(end);
+ });
+ };
+
+ bool areOpsInTheSameBlock(ArrayRef<Operation *> operations) {
+ assert(!operations.empty() &&
+ "The operations list should contain at least a single operation");
+ Block *block = operations.front()->getBlock();
+ return llvm::none_of(
+ operations, [&](Operation *op) { return block != op->getBlock(); });
+ }
+
+ /// Input:
+ /// func(){
+ /// %from = alloc()
+ /// write_to(%from)
+ /// %to = alloc()
+ /// copy(%from,%to)
+ /// dealloc(%from)
+ /// return %to
+ /// }
+ ///
+ /// Output:
+ /// func(){
+ /// %from = alloc()
+ /// write_to(%from)
+ /// return %from
+ /// }
+ /// Constraints:
+ /// 1) %to, copy and dealloc must all be defined and lie in the same block.
+ /// 2) This transformation cannot be applied if there is a single user/alias
+ /// of `to` value between the defining operation of `to` and the copy
+ /// operation.
+ /// 3) This transformation cannot be applied if there is a single user/alias
+ /// of `from` value between the copy operation and the deallocation of `from`.
+ /// TODO: Alias analysis is not available at the moment. Currently, we check
+ /// if there are any operations with memory effects between copy and
+ /// deallocation operations.
+ void reuseCopySourceAsTarget(CopyOpInterface copyOp) {
+ if (eraseList.count(copyOp))
+ return;
+
+ Value from = copyOp.getSource();
+ Value to = copyOp.getTarget();
+
+ Operation *copy = copyOp.getOperation();
+ Block *copyBlock = copy->getBlock();
+ Operation *fromDefiningOp = from.getDefiningOp();
+ Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock);
+ Operation *toDefiningOp = getAllocationOpInBlock(to, copyBlock);
+ if (!fromDefiningOp || !fromFreeingOp || !toDefiningOp ||
+ !areOpsInTheSameBlock({fromFreeingOp, toDefiningOp, copy}) ||
+ hasUsersBetween(to, toDefiningOp, copy) ||
+ hasUsersBetween(from, copy, fromFreeingOp) ||
+ hasMemoryEffectOpBetween(copy, fromFreeingOp))
+ return;
+
+ replaceList.insert({to, from});
+ eraseList.insert(copy);
+ eraseList.insert(toDefiningOp);
+ eraseList.insert(fromFreeingOp);
+ }
+
+ /// Input:
+ /// func(){
+ /// %to = alloc()
+ /// %from = alloc()
+ /// write_to(%from)
+ /// copy(%from,%to)
+ /// dealloc(%from)
+ /// return %to
+ /// }
+ ///
+ /// Output:
+ /// func(){
+ /// %to = alloc()
+ /// write_to(%to)
+ /// return %to
+ /// }
+ /// Constraints:
+ /// 1) %from, copy and dealloc must all be defined and lie in the same block.
+ /// 2) This transformation cannot be applied if there is a single user/alias
+ /// of `to` value between the defining operation of `from` and the copy
+ /// operation.
+ /// 3) This transformation cannot be applied if there is a single user/alias
+ /// of `from` value between the copy operation and the deallocation of `from`.
+ /// TODO: Alias analysis is not available at the moment. Currently, we check
+ /// if there are any operations with memory effects between copy and
+ /// deallocation operations.
+ void reuseCopyTargetAsSource(CopyOpInterface copyOp) {
+ if (eraseList.count(copyOp))
+ return;
+
+ Value from = copyOp.getSource();
+ Value to = copyOp.getTarget();
+
+ Operation *copy = copyOp.getOperation();
+ Block *copyBlock = copy->getBlock();
+ Operation *fromDefiningOp = getAllocationOpInBlock(from, copyBlock);
+ Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock);
+ if (!fromDefiningOp || !fromFreeingOp ||
+ !areOpsInTheSameBlock({fromFreeingOp, fromDefiningOp, copy}) ||
+ hasUsersBetween(to, fromDefiningOp, copy) ||
+ hasUsersBetween(from, copy, fromFreeingOp) ||
+ hasMemoryEffectOpBetween(copy, fromFreeingOp))
+ return;
+
+ replaceList.insert({from, to});
+ eraseList.insert(copy);
+ eraseList.insert(fromDefiningOp);
+ eraseList.insert(fromFreeingOp);
+ }
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// CopyRemovalPass construction
+//===----------------------------------------------------------------------===//
+
+std::unique_ptr<Pass> mlir::createCopyRemovalPass() {
+ return std::make_unique<CopyRemovalPass>();
+}
diff --git a/mlir/test/Transforms/buffer-deallocation.mlir b/mlir/test/Transforms/buffer-deallocation.mlir
index 35f7bbf79c8f..25197d14fba7 100644
--- a/mlir/test/Transforms/buffer-deallocation.mlir
+++ b/mlir/test/Transforms/buffer-deallocation.mlir
@@ -30,11 +30,13 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
}
// CHECK-NEXT: cond_br
-// CHECK: %[[ALLOC0:.*]] = memref.clone
+// CHECK: %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK-NEXT: br ^bb3(%[[ALLOC0]]
-// CHECK: %[[ALLOC1:.*]] = memref.alloc
+// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
-// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[ALLOC1]]
+// CHECK: %[[ALLOC2:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: br ^bb3(%[[ALLOC2]]
// CHECK: test.copy
@@ -75,12 +77,16 @@ func @condBranchDynamicType(
}
// CHECK-NEXT: cond_br
-// CHECK: %[[ALLOC0:.*]] = memref.clone
+// CHECK: %[[DIM0:.*]] = memref.dim
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[DIM0]])
+// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
// CHECK-NEXT: br ^bb3(%[[ALLOC0]]
// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
// CHECK-NEXT: test.buffer_based
-// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ALLOC1]]
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc(%[[DIM1]])
+// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC2]])
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: br ^bb3
// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
@@ -136,10 +142,12 @@ func @condBranchDynamicTypeNested(
return
}
-// CHECK-NEXT: cond_br{{.*}}
-// CHECK-NEXT: ^bb1
-// CHECK-NEXT: %[[ALLOC0:.*]] = memref.clone
-// CHECK-NEXT: br ^bb6(%[[ALLOC0]]
+// CHECK-NEXT: cond_br
+// CHECK: ^bb1
+// CHECK: %[[DIM0:.*]] = memref.dim
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[DIM0]])
+// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
+// CHECK-NEXT: br ^bb6
// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
// CHECK-NEXT: test.buffer_based
@@ -149,7 +157,9 @@ func @condBranchDynamicTypeNested(
// CHECK: ^bb4:
// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}})
// CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}})
-// CHECK-NEXT: %[[ALLOC3:.*]] = memref.clone %[[ALLOC2]]
+// CHECK: %[[DIM2:.*]] = memref.dim %[[ALLOC2]]
+// CHECK-NEXT: %[[ALLOC3:.*]] = memref.alloc(%[[DIM2]])
+// CHECK-NEXT: linalg.copy(%[[ALLOC2]], %[[ALLOC3]])
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: br ^bb6(%[[ALLOC3]]{{.*}})
// CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}})
@@ -198,11 +208,13 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
return
}
-// CHECK-NEXT: %[[ALLOC0:.*]] = memref.clone
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK-NEXT: cond_br
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
-// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[ALLOC1]]
+// CHECK: %[[ALLOC2:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc
@@ -407,17 +419,20 @@ func @moving_alloc_and_inserting_missing_dealloc(
return
}
-// CHECK-NEXT: cond_br{{.*}}
-// CHECK-NEXT: ^bb1
+// CHECK-NEXT: cond_br
+// CHECK: ^bb1
+// CHECK: ^bb1
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
-// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %[[ALLOC0]]
+// CHECK: %[[ALLOC1:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: br ^bb3(%[[ALLOC1]]
// CHECK-NEXT: ^bb2
// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
-// CHECK-NEXT: %[[ALLOC3:.*]] = memref.clone %[[ALLOC2]]
+// CHECK: %[[ALLOC3:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
// CHECK-NEXT: br ^bb3(%[[ALLOC3]]
// CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}})
@@ -530,7 +545,8 @@ func @nested_regions_and_cond_branch(
}
// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
// CHECK-NEXT: cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
-// CHECK: %[[ALLOC0:.*]] = memref.clone %[[ARG1]]
+// CHECK: %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC0]])
// CHECK: ^[[BB2]]:
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
@@ -538,11 +554,12 @@ func @nested_regions_and_cond_branch(
// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC2]]
// CHECK: memref.dealloc %[[ALLOC2]]
// CHECK-NEXT: %{{.*}} = math.exp
-// CHECK: %[[ALLOC3:.*]] = memref.clone %[[ALLOC1]]
+// CHECK: %[[ALLOC3:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC3]])
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: ^[[BB3:.*]]({{.*}}):
// CHECK: test.copy
-// CHECK-NEXT: memref.dealloc
+// CHECK-NEXT: dealloc
// -----
@@ -624,10 +641,12 @@ func @nested_region_control_flow_div(
// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
-// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[ALLOC0]]
+// CHECK: %[[ALLOC2:.*]] = memref.alloc
+// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC2]])
// CHECK: scf.yield %[[ALLOC2]]
// CHECK: %[[ALLOC3:.*]] = memref.alloc(%arg0, %arg1)
-// CHECK-NEXT: %[[ALLOC4:.*]] = memref.clone %[[ALLOC3]]
+// CHECK: %[[ALLOC4:.*]] = memref.alloc
+// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
// CHECK: memref.dealloc %[[ALLOC3]]
// CHECK: scf.yield %[[ALLOC4]]
// CHECK: memref.dealloc %[[ALLOC0]]
@@ -804,18 +823,20 @@ func @nestedRegionsAndCondBranchAlloca(
// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
// CHECK-NEXT: cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
// CHECK: ^[[BB1]]:
-// CHECK: %[[ALLOC0:.*]] = memref.clone
+// CHECK: %[[ALLOC0:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK: ^[[BB2]]:
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
// CHECK: %[[ALLOCA:.*]] = memref.alloca()
// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOCA]]
// CHECK: %{{.*}} = math.exp
-// CHECK: %[[ALLOC2:.*]] = memref.clone %[[ALLOC1]]
+// CHECK: %[[ALLOC2:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: ^[[BB3:.*]]({{.*}}):
// CHECK: test.copy
-// CHECK-NEXT: memref.dealloc
+// CHECK-NEXT: dealloc
// -----
@@ -867,13 +888,15 @@ func @loop_alloc(
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
-// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %arg3
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
+// CHECK: linalg.copy(%arg3, %[[ALLOC1]])
// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
// CHECK: cmpi
// CHECK: memref.dealloc %[[IALLOC]]
// CHECK: %[[ALLOC3:.*]] = memref.alloc()
-// CHECK: %[[ALLOC4:.*]] = memref.clone %[[ALLOC3]]
+// CHECK: %[[ALLOC4:.*]] = memref.alloc()
+// CHECK: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
// CHECK: memref.dealloc %[[ALLOC3]]
// CHECK: scf.yield %[[ALLOC4]]
// CHECK: }
@@ -951,21 +974,25 @@ func @loop_nested_if_alloc(
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
-// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %arg3
+// CHECK: %[[ALLOC1:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
// CHECK: memref.dealloc %[[IALLOC]]
// CHECK: %[[ALLOC3:.*]] = scf.if
// CHECK: %[[ALLOC4:.*]] = memref.alloc()
-// CHECK-NEXT: %[[ALLOC5:.*]] = memref.clone %[[ALLOC4]]
+// CHECK-NEXT: %[[ALLOC5:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]])
// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
// CHECK-NEXT: scf.yield %[[ALLOC5]]
-// CHECK: %[[ALLOC6:.*]] = memref.clone %[[ALLOC0]]
+// CHECK: %[[ALLOC6:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]])
// CHECK-NEXT: scf.yield %[[ALLOC6]]
-// CHECK: %[[ALLOC7:.*]] = memref.clone %[[ALLOC3]]
+// CHECK: %[[ALLOC7:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]])
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: scf.yield %[[ALLOC7]]
@@ -1013,14 +1040,17 @@ func @loop_nested_alloc(
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
-// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %arg3
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC0:.*]] = %[[ALLOC1]])
-// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[IALLOC0]]
+// CHECK: %[[ALLOC2:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]])
// CHECK-NEXT: memref.dealloc %[[IALLOC0]]
// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC1:.*]] = %[[ALLOC2]])
-// CHECK-NEXT: %[[ALLOC5:.*]] = memref.clone %[[IALLOC1]]
+// CHECK: %[[ALLOC5:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]])
// CHECK-NEXT: memref.dealloc %[[IALLOC1]]
// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args
@@ -1030,23 +1060,28 @@ func @loop_nested_alloc(
// CHECK: %[[ALLOC9:.*]] = scf.if
// CHECK: %[[ALLOC11:.*]] = memref.alloc()
-// CHECK-NEXT: %[[ALLOC12:.*]] = memref.clone %[[ALLOC11]]
+// CHECK-NEXT: %[[ALLOC12:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]])
// CHECK-NEXT: memref.dealloc %[[ALLOC11]]
// CHECK-NEXT: scf.yield %[[ALLOC12]]
-// CHECK: %[[ALLOC13:.*]] = memref.clone %[[IALLOC2]]
+// CHECK: %[[ALLOC13:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]])
// CHECK-NEXT: scf.yield %[[ALLOC13]]
// CHECK: memref.dealloc %[[IALLOC2]]
-// CHECK-NEXT: %[[ALLOC10:.*]] = memref.clone %[[ALLOC9]]
+// CHECK-NEXT: %[[ALLOC10:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]])
// CHECK-NEXT: memref.dealloc %[[ALLOC9]]
// CHECK-NEXT: scf.yield %[[ALLOC10]]
-// CHECK: %[[ALLOC7:.*]] = memref.clone %[[ALLOC6]]
+// CHECK: %[[ALLOC7:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]])
// CHECK-NEXT: memref.dealloc %[[ALLOC6]]
// CHECK-NEXT: scf.yield %[[ALLOC7]]
-// CHECK: %[[ALLOC4:.*]] = memref.clone %[[ALLOC3]]
+// CHECK: %[[ALLOC4:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: scf.yield %[[ALLOC4]]
@@ -1148,7 +1183,8 @@ func @assumingOp(
// CHECK-NEXT: shape.assuming_yield %[[ARG1]]
// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]]
// CHECK-NEXT: %[[TMP_ALLOC:.*]] = memref.alloc()
-// CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = memref.clone %[[TMP_ALLOC]]
+// CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[TMP_ALLOC]], %[[RETURNING_ALLOC]])
// CHECK-NEXT: memref.dealloc %[[TMP_ALLOC]]
// CHECK-NEXT: shape.assuming_yield %[[RETURNING_ALLOC]]
// CHECK: test.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]])
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index e54135f21b4b..e1869ac58f52 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -1120,87 +1120,3 @@ func @fold_trunci_sexti(%arg0: i1) -> i1 attributes {} {
%1 = trunci %0 : i8 to i1
return %1 : i1
}
-
-// CHECK-LABEL: func @simple_clone_elimination
-func @simple_clone_elimination() -> memref<5xf32> {
- %ret = memref.alloc() : memref<5xf32>
- %temp = memref.clone %ret : memref<5xf32> to memref<5xf32>
- memref.dealloc %temp : memref<5xf32>
- return %ret : memref<5xf32>
-}
-// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
-// CHECK-NOT: %[[temp:.*]] = memref.clone
-// CHECK-NOT: memref.dealloc %[[temp]]
-// CHECK: return %[[ret]]
-
-// -----
-
-// CHECK-LABEL: func @clone_loop_alloc
-func @clone_loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
- %0 = memref.alloc() : memref<2xf32>
- memref.dealloc %0 : memref<2xf32>
- %1 = memref.clone %arg3 : memref<2xf32> to memref<2xf32>
- %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) {
- %3 = cmpi eq, %arg5, %arg1 : index
- memref.dealloc %arg6 : memref<2xf32>
- %4 = memref.alloc() : memref<2xf32>
- %5 = memref.clone %4 : memref<2xf32> to memref<2xf32>
- memref.dealloc %4 : memref<2xf32>
- %6 = memref.clone %5 : memref<2xf32> to memref<2xf32>
- memref.dealloc %5 : memref<2xf32>
- scf.yield %6 : memref<2xf32>
- }
- linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32>
- memref.dealloc %2 : memref<2xf32>
- return
-}
-
-// CHECK-NEXT: %[[ALLOC0:.*]] = memref.clone
-// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for
-// CHECK-NEXT: memref.dealloc
-// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc
-// CHECK-NEXT: scf.yield %[[ALLOC2]]
-// CHECK: linalg.copy(%[[ALLOC1]]
-// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
-
-// -----
-
-// CHECK-LABEL: func @clone_nested_region
-func @clone_nested_region(%arg0: index, %arg1: index) -> memref<?x?xf32> {
- %0 = cmpi eq, %arg0, %arg1 : index
- %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
- %2 = scf.if %0 -> (memref<?x?xf32>) {
- %3 = scf.if %0 -> (memref<?x?xf32>) {
- %9 = memref.clone %1 : memref<?x?xf32> to memref<?x?xf32>
- scf.yield %9 : memref<?x?xf32>
- } else {
- %7 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
- %10 = memref.clone %7 : memref<?x?xf32> to memref<?x?xf32>
- memref.dealloc %7 : memref<?x?xf32>
- scf.yield %10 : memref<?x?xf32>
- }
- %6 = memref.clone %3 : memref<?x?xf32> to memref<?x?xf32>
- memref.dealloc %3 : memref<?x?xf32>
- scf.yield %6 : memref<?x?xf32>
- } else {
- %3 = memref.alloc(%arg1, %arg1) : memref<?x?xf32>
- %6 = memref.clone %3 : memref<?x?xf32> to memref<?x?xf32>
- memref.dealloc %3 : memref<?x?xf32>
- scf.yield %6 : memref<?x?xf32>
- }
- memref.dealloc %1 : memref<?x?xf32>
- return %2 : memref<?x?xf32>
-}
-
-// CHECK: %[[ALLOC1:.*]] = memref.alloc
-// CHECK-NEXT: %[[ALLOC2:.*]] = scf.if
-// CHECK-NEXT: %[[ALLOC3_1:.*]] = scf.if
-// CHECK-NEXT: %[[ALLOC4_1:.*]] = memref.clone %[[ALLOC1]]
-// CHECK-NEXT: scf.yield %[[ALLOC4_1]]
-// CHECK: %[[ALLOC4_2:.*]] = memref.alloc
-// CHECK-NEXT: scf.yield %[[ALLOC4_2]]
-// CHECK: scf.yield %[[ALLOC3_1]]
-// CHECK: %[[ALLOC3_2:.*]] = memref.alloc
-// CHECK-NEXT: scf.yield %[[ALLOC3_2]]
-// CHECK: memref.dealloc %[[ALLOC1]]
-// CHECK-NEXT: return %[[ALLOC2]]
diff --git a/mlir/test/Transforms/copy-removal.mlir b/mlir/test/Transforms/copy-removal.mlir
new file mode 100644
index 000000000000..a91c5c2b9528
--- /dev/null
+++ b/mlir/test/Transforms/copy-removal.mlir
@@ -0,0 +1,361 @@
+// RUN: mlir-opt -copy-removal -split-input-file %s | FileCheck %s
+
+// All linalg copies except the linalg.copy(%1, %9) must be removed since the
+// defining operation of %1 and its DeallocOp have been defined in another block.
+
+// CHECK-LABEL: func @nested_region_control_flow_div_nested
+func @nested_region_control_flow_div_nested(%arg0: index, %arg1: index) -> memref<?x?xf32> {
+ %0 = cmpi eq, %arg0, %arg1 : index
+ %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+ // CHECK: %{{.*}} = scf.if
+ %2 = scf.if %0 -> (memref<?x?xf32>) {
+ // CHECK: %[[PERCENT3:.*]] = scf.if
+ %3 = scf.if %0 -> (memref<?x?xf32>) {
+ %c0_0 = constant 0 : index
+ %7 = memref.dim %1, %c0_0 : memref<?x?xf32>
+ %c1_1 = constant 1 : index
+ %8 = memref.dim %1, %c1_1 : memref<?x?xf32>
+ %9 = memref.alloc(%7, %8) : memref<?x?xf32>
+ // CHECK: linalg.copy({{.*}}, %[[PERCENT9:.*]])
+ linalg.copy(%1, %9) : memref<?x?xf32>, memref<?x?xf32>
+ // CHECK: scf.yield %[[PERCENT9]]
+ scf.yield %9 : memref<?x?xf32>
+ } else {
+ // CHECK: %[[PERCENT7:.*]] = memref.alloc
+ %7 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+ %c0_0 = constant 0 : index
+ %8 = memref.dim %7, %c0_0 : memref<?x?xf32>
+ %c1_1 = constant 1 : index
+ %9 = memref.dim %7, %c1_1 : memref<?x?xf32>
+ // CHECK-NOT: %{{.*}} = memref.alloc
+ // CHECK-NOT: linalg.copy(%[[PERCENT7]], %{{.*}})
+ // CHECK-NOT: memref.dealloc %[[PERCENT7]]
+ %10 = memref.alloc(%8, %9) : memref<?x?xf32>
+ linalg.copy(%7, %10) : memref<?x?xf32>, memref<?x?xf32>
+ memref.dealloc %7 : memref<?x?xf32>
+ // CHECK: scf.yield %[[PERCENT7]]
+ scf.yield %10 : memref<?x?xf32>
+ }
+ %c0 = constant 0 : index
+ %4 = memref.dim %3, %c0 : memref<?x?xf32>
+ %c1 = constant 1 : index
+ %5 = memref.dim %3, %c1 : memref<?x?xf32>
+ // CHECK-NOT: %{{.*}} = memref.alloc
+ // CHECK-NOT: linalg.copy(%[[PERCENT3]], %{{.*}})
+ // CHECK-NOT: memref.dealloc %[[PERCENT3]]
+ %6 = memref.alloc(%4, %5) : memref<?x?xf32>
+ linalg.copy(%3, %6) : memref<?x?xf32>, memref<?x?xf32>
+ memref.dealloc %3 : memref<?x?xf32>
+ // CHECK: scf.yield %[[PERCENT3]]
+ scf.yield %6 : memref<?x?xf32>
+ } else {
+ // CHECK: %[[PERCENT3:.*]] = memref.alloc
+ %3 = memref.alloc(%arg1, %arg1) : memref<?x?xf32>
+ %c0 = constant 0 : index
+ %4 = memref.dim %3, %c0 : memref<?x?xf32>
+ %c1 = constant 1 : index
+ %5 = memref.dim %3, %c1 : memref<?x?xf32>
+ // CHECK-NOT: %{{.*}} = memref.alloc
+ // CHECK-NOT: linalg.copy(%[[PERCENT3]], %{{.*}})
+ // CHECK-NOT: memref.dealloc %[[PERCENT3]]
+ %6 = memref.alloc(%4, %5) : memref<?x?xf32>
+ linalg.copy(%3, %6) : memref<?x?xf32>, memref<?x?xf32>
+ memref.dealloc %3 : memref<?x?xf32>
+ // CHECK: scf.yield %[[PERCENT3]]
+ scf.yield %6 : memref<?x?xf32>
+ }
+ memref.dealloc %1 : memref<?x?xf32>
+ return %2 : memref<?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @simple_test
+func @simple_test() -> memref<5xf32> {
+ %temp = memref.alloc() : memref<5xf32>
+ %ret = memref.alloc() : memref<5xf32>
+ linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
+ memref.dealloc %ret : memref<5xf32>
+ return %temp : memref<5xf32>
+}
+// CHECK-SAME: () -> memref<5xf32>
+// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
+// CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
+// CHECK-NOT: memref.dealloc %[[ret]]
+// CHECK: return %[[ret]]
+
+// -----
+
+// It is legal to remove the copy operation that %ret has a usage before the copy
+// operation. The allocation of %temp and the deallocation of %ret should be also
+// removed.
+
+// CHECK-LABEL: func @test_with_ret_usage_before_copy
+func @test_with_ret_usage_before_copy() -> memref<5xf32> {
+ %ret = memref.alloc() : memref<5xf32>
+ %temp = memref.alloc() : memref<5xf32>
+ %c0 = constant 0 : index
+ %dimension = memref.dim %ret, %c0 : memref<5xf32>
+ linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
+ memref.dealloc %ret : memref<5xf32>
+ return %temp : memref<5xf32>
+}
+// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
+// CHECK-NOT: %{{.*}} = memref.alloc
+// CHECK-NEXT: %{{.*}} = constant
+// CHECK-NEXT: %[[DIM:.*]] = memref.dim %[[ret]]
+// CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
+// CHECK-NOT: memref.dealloc %[[ret]]
+// CHECK: return %[[ret]]
+
+// -----
+
+// It is illegal to remove a copy operation that %ret has a usage after copy
+// operation.
+
+// CHECK-LABEL: func @test_with_ret_usage_after_copy
+func @test_with_ret_usage_after_copy() -> memref<5xf32> {
+ %ret = memref.alloc() : memref<5xf32>
+ %temp = memref.alloc() : memref<5xf32>
+ // CHECK: linalg.copy
+ linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
+ %c0 = constant 0 : index
+ %dimension = memref.dim %ret, %c0 : memref<5xf32>
+ memref.dealloc %ret : memref<5xf32>
+ return %temp : memref<5xf32>
+}
+
+// -----
+
+// It is illegal to remove a copy operation that %temp has a usage before copy
+// operation.
+
+// CHECK-LABEL: func @test_with_temp_usage_before_copy
+func @test_with_temp_usage_before_copy() -> memref<5xf32> {
+ %ret = memref.alloc() : memref<5xf32>
+ %temp = memref.alloc() : memref<5xf32>
+ %c0 = constant 0 : index
+ %dimension = memref.dim %temp, %c0 : memref<5xf32>
+ // CHECK: linalg.copy
+ linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
+ memref.dealloc %ret : memref<5xf32>
+ return %temp : memref<5xf32>
+}
+
+// -----
+
+// It is legal to remove the copy operation that %temp has a usage after the copy
+// operation. The allocation of %temp and the deallocation of %ret could be also
+// removed.
+
+// However the following pattern is not handled by copy removal.
+// %from = memref.alloc()
+// %to = memref.alloc()
+// copy(%from, %to)
+// read_from(%from) + write_to(%something_else)
+// memref.dealloc(%from)
+// return %to
+// In particular, linalg.generic is a memoryEffectOp between copy and dealloc.
+// Since no alias analysis is performed and no distinction is made between reads
+// and writes, the linalg.generic with effects blocks copy removal.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @test_with_temp_usage_after_copy
+func @test_with_temp_usage_after_copy() -> memref<5xf32> {
+ %ret = memref.alloc() : memref<5xf32>
+ %res = memref.alloc() : memref<5xf32>
+ %temp = memref.alloc() : memref<5xf32>
+ linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
+ linalg.generic {
+ indexing_maps = [#map0, #map0],
+ iterator_types = ["parallel"]}
+ ins(%temp : memref<5xf32>)
+ outs(%res : memref<5xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = math.exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ memref.dealloc %ret : memref<5xf32>
+ return %temp : memref<5xf32>
+}
+// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
+// CHECK-NEXT: %[[res:.*]] = memref.alloc()
+// CHECK-NEXT: %[[temp:.*]] = memref.alloc()
+// CHECK-NEXT: linalg.copy(%[[ret]], %[[temp]])
+// CHECK-NEXT: linalg.generic
+// CHECK: memref.dealloc %[[ret]]
+// CHECK: return %[[temp]]
+
+// -----
+
+// CHECK-LABEL: func @make_allocation
+func @make_allocation() -> memref<5xf32> {
+ %mem = memref.alloc() : memref<5xf32>
+ return %mem : memref<5xf32>
+}
+
+// CHECK-LABEL: func @test_with_function_call
+func @test_with_function_call() -> memref<5xf32> {
+ // CHECK-NEXT: %[[ret:.*]] = call @make_allocation() : () -> memref<5xf32>
+ %ret = call @make_allocation() : () -> (memref<5xf32>)
+ // CHECK-NOT: %{{.*}} = memref.alloc
+ // CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
+ // CHECK-NOT: memref.dealloc %[[ret]]
+ %temp = memref.alloc() : memref<5xf32>
+ linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
+ memref.dealloc %ret : memref<5xf32>
+ // CHECK: return %[[ret]]
+ return %temp : memref<5xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @multiple_deallocs_in_
diff erent_blocks
+func @multiple_deallocs_in_
diff erent_blocks(%cond : i1) -> memref<5xf32> {
+ // CHECK-NEXT: %[[PERCENT0:.*]] = memref.alloc()
+ %0 = memref.alloc() : memref<5xf32>
+ cond_br %cond, ^bb1, ^bb2
+^bb1:
+ memref.dealloc %0 : memref<5xf32>
+ // CHECK: br ^[[BB3:.*]](%[[PERCENT0]]
+ br ^bb3(%0 : memref<5xf32>)
+^bb2:
+ // CHECK-NOT: %{{.*}} = memref.alloc
+ // CHECK-NOT: linalg.copy(%[[PERCENT0]], %{{.*}})
+ // CHECK-NOT: memref.dealloc %[[PERCENT0]]
+ %temp = memref.alloc() : memref<5xf32>
+ linalg.copy(%0, %temp) : memref<5xf32>, memref<5xf32>
+ memref.dealloc %0 : memref<5xf32>
+ // CHECK: br ^[[BB3]](%[[PERCENT0]]
+ br ^bb3(%temp : memref<5xf32>)
+^bb3(%res : memref<5xf32>):
+ return %res : memref<5xf32>
+}
+
+// -----
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @test_ReuseCopyTargetAsSource
+func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>, %result: memref<2xf32>){
+ // CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[RES:.*]]: memref<2xf32>)
+ // CHECK-NOT: %{{.*}} = memref.alloc
+ %temp = memref.alloc() : memref<2xf32>
+ // CHECK-NEXT: linalg.generic
+ // CHECK-SAME: ins(%[[ARG0]]{{.*}}outs(%[[RES]]
+ // CHECK-NOT: linalg.copy(%{{.*}}, %[[RES]])
+ // CHECK-NOT: memref.dealloc %{{.*}}
+ linalg.generic {
+ indexing_maps = [#map0, #map0],
+ iterator_types = ["parallel"]}
+ ins(%arg0 : memref<2xf32>)
+ outs(%temp : memref<2xf32>) {
+ ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
+ %tmp2 = math.exp %gen2_arg0 : f32
+ linalg.yield %tmp2 : f32
+ }
+ linalg.copy(%temp, %result) : memref<2xf32>, memref<2xf32>
+ memref.dealloc %temp : memref<2xf32>
+ // CHECK: return
+ return
+}
+
+// -----
+
+// Copy operation must not be removed since an operation writes to %to value
+// before copy.
+
+#map0 = affine_map<(d0) -> (d0)>
+
+// CHECK-LABEL: func @test_ReuseCopyTargetAsSource
+func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>){
+ %to = memref.alloc() : memref<2xf32>
+ %temp = memref.alloc() : memref<2xf32>
+ linalg.generic {
+ indexing_maps = [#map0, #map0],
+ iterator_types = ["parallel"]}
+ ins(%arg0 : memref<2xf32>)
+ outs(%temp : memref<2xf32>) {
+ ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
+ %tmp1 = math.exp %gen1_arg0 : f32
+ linalg.yield %tmp1 : f32
+ }
+ linalg.generic {
+ indexing_maps = [#map0, #map0],
+ iterator_types = ["parallel"]}
+ ins(%arg0 : memref<2xf32>)
+ outs(%to : memref<2xf32>) {
+ ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
+ %tmp2 = math.exp %gen2_arg0 : f32
+ linalg.yield %tmp2 : f32
+ }
+ // CHECK: linalg.copy
+ linalg.copy(%temp, %to) : memref<2xf32>, memref<2xf32>
+ memref.dealloc %temp : memref<2xf32>
+ return
+}
+
+// -----
+
+// The only redundant copy is linalg.copy(%4, %5)
+
+// CHECK-LABEL: func @loop_alloc
+func @loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
+ // CHECK: %{{.*}} = memref.alloc()
+ %0 = memref.alloc() : memref<2xf32>
+ memref.dealloc %0 : memref<2xf32>
+ // CHECK: %{{.*}} = memref.alloc()
+ %1 = memref.alloc() : memref<2xf32>
+ // CHECK: linalg.copy
+ linalg.copy(%arg3, %1) : memref<2xf32>, memref<2xf32>
+ %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) {
+ %3 = cmpi eq, %arg5, %arg1 : index
+ // CHECK: memref.dealloc
+ memref.dealloc %arg6 : memref<2xf32>
+ // CHECK: %[[PERCENT4:.*]] = memref.alloc()
+ %4 = memref.alloc() : memref<2xf32>
+ // CHECK-NOT: memref.alloc
+ // CHECK-NOT: linalg.copy
+ // CHECK-NOT: memref.dealloc
+ %5 = memref.alloc() : memref<2xf32>
+ linalg.copy(%4, %5) : memref<2xf32>, memref<2xf32>
+ memref.dealloc %4 : memref<2xf32>
+ // CHECK: %[[PERCENT6:.*]] = memref.alloc()
+ %6 = memref.alloc() : memref<2xf32>
+ // CHECK: linalg.copy(%[[PERCENT4]], %[[PERCENT6]])
+ linalg.copy(%5, %6) : memref<2xf32>, memref<2xf32>
+ scf.yield %6 : memref<2xf32>
+ }
+ // CHECK: linalg.copy
+ linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32>
+ memref.dealloc %2 : memref<2xf32>
+ return
+}
+
+// -----
+
+// The linalg.copy operation can be removed in addition to alloc and dealloc
+// operations. All uses of %0 is then replaced with %arg2.
+
+// CHECK-LABEL: func @check_with_affine_dialect
+func @check_with_affine_dialect(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) {
+ // CHECK-SAME: (%[[ARG0:.*]]: memref<4xf32>, %[[ARG1:.*]]: memref<4xf32>, %[[RES:.*]]: memref<4xf32>)
+ // CHECK-NOT: memref.alloc
+ %0 = memref.alloc() : memref<4xf32>
+ affine.for %arg3 = 0 to 4 {
+ %5 = affine.load %arg0[%arg3] : memref<4xf32>
+ %6 = affine.load %arg1[%arg3] : memref<4xf32>
+ %7 = cmpf ogt, %5, %6 : f32
+ // CHECK: %[[SELECT_RES:.*]] = select
+ %8 = select %7, %5, %6 : f32
+ // CHECK-NEXT: affine.store %[[SELECT_RES]], %[[RES]]
+ affine.store %8, %0[%arg3] : memref<4xf32>
+ }
+ // CHECK-NOT: linalg.copy
+ // CHECK-NOT: dealloc
+ linalg.copy(%0, %arg2) : memref<4xf32>, memref<4xf32>
+ memref.dealloc %0 : memref<4xf32>
+ //CHECK: return
+ return
+}
More information about the Mlir-commits
mailing list