[Mlir-commits] [mlir] 465b9a4 - Revert "Revert "[mlir] Introduce CloneOp and adapt test cases in BufferDeallocation.""
Alexander Belyaev
llvmlistbot at llvm.org
Wed Mar 31 00:55:21 PDT 2021
Author: Alexander Belyaev
Date: 2021-03-31T09:49:09+02:00
New Revision: 465b9a4a3303727df1584ca52bdced964a34efe9
URL: https://github.com/llvm/llvm-project/commit/465b9a4a3303727df1584ca52bdced964a34efe9
DIFF: https://github.com/llvm/llvm-project/commit/465b9a4a3303727df1584ca52bdced964a34efe9.diff
LOG: Revert "Revert "[mlir] Introduce CloneOp and adapt test cases in BufferDeallocation.""
This reverts commit 883912abe669ef246ada0adc9cf1c9748b742400.
Added:
mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
Modified:
mlir/docs/BufferDeallocationInternals.md
mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
mlir/include/mlir/Transforms/BufferUtils.h
mlir/include/mlir/Transforms/Passes.h
mlir/include/mlir/Transforms/Passes.td
mlir/lib/Dialect/MemRef/CMakeLists.txt
mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
mlir/lib/Transforms/BufferDeallocation.cpp
mlir/lib/Transforms/BufferUtils.cpp
mlir/lib/Transforms/CMakeLists.txt
mlir/test/Transforms/buffer-deallocation.mlir
mlir/test/Transforms/canonicalize.mlir
Removed:
mlir/lib/Transforms/CopyRemoval.cpp
mlir/test/Transforms/copy-removal.mlir
################################################################################
diff --git a/mlir/docs/BufferDeallocationInternals.md b/mlir/docs/BufferDeallocationInternals.md
index dee37493512d8..7c731066d31e8 100644
--- a/mlir/docs/BufferDeallocationInternals.md
+++ b/mlir/docs/BufferDeallocationInternals.md
@@ -48,7 +48,7 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>) {
partial_write(%0, %0)
br ^bb3()
^bb3():
- "linalg.copy"(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> ()
+ test.copy(%0, %arg1) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -133,11 +133,11 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
^bb1:
br ^bb3(%arg1 : memref<2xf32>)
^bb2:
- %0 = alloc() : memref<2xf32> // aliases: %1
+ %0 = memref.alloc() : memref<2xf32> // aliases: %1
use(%0)
br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>): // %1 could be %0 or %arg1
- "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -149,7 +149,7 @@ of code:
```mlir
func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
- %0 = alloc() : memref<2xf32> // moved to bb0
+ %0 = memref.alloc() : memref<2xf32> // moved to bb0
cond_br %arg0, ^bb1, ^bb2
^bb1:
br ^bb3(%arg1 : memref<2xf32>)
@@ -157,7 +157,7 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
use(%0)
br ^bb3(%0 : memref<2xf32>)
^bb3(%1: memref<2xf32>):
- "linalg.copy"(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
+ test.copy(%1, %arg2) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -179,17 +179,17 @@ func @condBranchDynamicType(
^bb1:
br ^bb3(%arg1 : memref<?xf32>)
^bb2(%0: index):
- %1 = alloc(%0) : memref<?xf32> // cannot be moved upwards to the data
+ %1 = memref.alloc(%0) : memref<?xf32> // cannot be moved upwards to the data
// dependency to %0
use(%1)
br ^bb3(%1 : memref<?xf32>)
^bb3(%2: memref<?xf32>):
- "linalg.copy"(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ test.copy(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
return
}
```
-## Introduction of Copies
+## Introduction of Clones
In order to guarantee that all allocated buffers are freed properly, we have to
pay attention to the control flow and all potential aliases a buffer allocation
@@ -200,10 +200,10 @@ allocations have already been placed:
```mlir
func @branch(%arg0: i1) {
- %0 = alloc() : memref<2xf32> // aliases: %2
+ %0 = memref.alloc() : memref<2xf32> // aliases: %2
cond_br %arg0, ^bb1, ^bb2
^bb1:
- %1 = alloc() : memref<2xf32> // resides here for demonstration purposes
+ %1 = memref.alloc() : memref<2xf32> // resides here for demonstration purposes
// aliases: %2
br ^bb3(%1 : memref<2xf32>)
^bb2:
@@ -232,88 +232,31 @@ result:
```mlir
func @branch(%arg0: i1) {
- %0 = alloc() : memref<2xf32>
+ %0 = memref.alloc() : memref<2xf32>
cond_br %arg0, ^bb1, ^bb2
^bb1:
- %1 = alloc() : memref<2xf32>
- %3 = alloc() : memref<2xf32> // temp copy for %1
- "linalg.copy"(%1, %3) : (memref<2xf32>, memref<2xf32>) -> ()
- dealloc %1 : memref<2xf32> // %1 can be safely freed here
+ %1 = memref.alloc() : memref<2xf32>
+ %3 = memref.clone %1 : (memref<2xf32>) -> (memref<2xf32>)
+ memref.dealloc %1 : memref<2xf32> // %1 can be safely freed here
br ^bb3(%3 : memref<2xf32>)
^bb2:
use(%0)
- %4 = alloc() : memref<2xf32> // temp copy for %0
- "linalg.copy"(%0, %4) : (memref<2xf32>, memref<2xf32>) -> ()
+ %4 = memref.clone %0 : (memref<2xf32>) -> (memref<2xf32>)
br ^bb3(%4 : memref<2xf32>)
^bb3(%2: memref<2xf32>):
…
- dealloc %2 : memref<2xf32> // free temp buffer %2
- dealloc %0 : memref<2xf32> // %0 can be safely freed here
+ memref.dealloc %2 : memref<2xf32> // free temp buffer %2
+ memref.dealloc %0 : memref<2xf32> // %0 can be safely freed here
return
}
```
Note that a temporary buffer for %2 was introduced to free all allocations
properly. Note further that the unnecessary allocation of %3 can be easily
-removed using one of the post-pass transformations.
-
-Reconsider the previously introduced sample demonstrating dynamically shaped
-types:
-
-```mlir
-func @condBranchDynamicType(
- %arg0: i1,
- %arg1: memref<?xf32>,
- %arg2: memref<?xf32>,
- %arg3: index) {
- cond_br %arg0, ^bb1, ^bb2(%arg3: index)
-^bb1:
- br ^bb3(%arg1 : memref<?xf32>)
-^bb2(%0: index):
- %1 = alloc(%0) : memref<?xf32> // aliases: %2
- use(%1)
- br ^bb3(%1 : memref<?xf32>)
-^bb3(%2: memref<?xf32>):
- "linalg.copy"(%2, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
- return
-}
-```
+removed using one of the post-pass transformations or the canonicalization
+pass.
-In the presence of DSTs, we have to parameterize the allocations with
-additional dimension information of the source buffers, we want to copy from.
-BufferDeallocation automatically introduces all required operations to extract
-dimension specifications and wires them with the associated allocations:
-
-```mlir
-func @condBranchDynamicType(
- %arg0: i1,
- %arg1: memref<?xf32>,
- %arg2: memref<?xf32>,
- %arg3: index) {
- cond_br %arg0, ^bb1, ^bb2(%arg3 : index)
-^bb1:
- %c0 = constant 0 : index
- %0 = dim %arg1, %c0 : memref<?xf32> // dimension operation to parameterize
- // the following temp allocation
- %1 = alloc(%0) : memref<?xf32>
- "linalg.copy"(%arg1, %1) : (memref<?xf32>, memref<?xf32>) -> ()
- br ^bb3(%1 : memref<?xf32>)
-^bb2(%2: index):
- %3 = alloc(%2) : memref<?xf32>
- use(%3)
- %c0_0 = constant 0 : index
- %4 = dim %3, %c0_0 : memref<?xf32> // dimension operation to parameterize
- // the following temp allocation
- %5 = alloc(%4) : memref<?xf32>
- "linalg.copy"(%3, %5) : (memref<?xf32>, memref<?xf32>) -> ()
- dealloc %3 : memref<?xf32> // %3 can be safely freed here
- br ^bb3(%5 : memref<?xf32>)
-^bb3(%6: memref<?xf32>):
- "linalg.copy"(%6, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
- dealloc %6 : memref<?xf32> // %6 can be safely freed here
- return
-}
-```
+The presented example also works with dynamically shaped types.
BufferDeallocation performs a fix-point iteration taking all aliases of all
tracked allocations into account. We initialize the general iteration process
@@ -335,7 +278,7 @@ func @condBranchDynamicTypeNested(
^bb1:
br ^bb6(%arg1 : memref<?xf32>)
^bb2(%0: index):
- %1 = alloc(%0) : memref<?xf32> // cannot be moved upwards due to the data
+ %1 = memref.alloc(%0) : memref<?xf32> // cannot be moved upwards due to the data
// dependency to %0
// aliases: %2, %3, %4
use(%1)
@@ -349,7 +292,7 @@ func @condBranchDynamicTypeNested(
^bb6(%3: memref<?xf32>): // crit. alias of %arg1 and %2 (in other words %1)
br ^bb7(%3 : memref<?xf32>)
^bb7(%4: memref<?xf32>): // non-crit. alias of %3, since %3 dominates %4
- "linalg.copy"(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
return
}
```
@@ -366,13 +309,11 @@ func @condBranchDynamicTypeNested(
%arg3: index) {
cond_br %arg0, ^bb1, ^bb2(%arg3 : index)
^bb1:
- %c0 = constant 0 : index
- %d0 = dim %arg1, %c0 : memref<?xf32>
- %5 = alloc(%d0) : memref<?xf32> // temp buffer required due to alias %3
- "linalg.copy"(%arg1, %5) : (memref<?xf32>, memref<?xf32>) -> ()
+ // temp buffer required due to alias %3
+ %5 = memref.clone %arg1 : (memref<?xf32>) -> (memref<?xf32>)
br ^bb6(%5 : memref<?xf32>)
^bb2(%0: index):
- %1 = alloc(%0) : memref<?xf32>
+ %1 = memref.alloc(%0) : memref<?xf32>
use(%1)
cond_br %arg0, ^bb3, ^bb4
^bb3:
@@ -380,17 +321,14 @@ func @condBranchDynamicTypeNested(
^bb4:
br ^bb5(%1 : memref<?xf32>)
^bb5(%2: memref<?xf32>):
- %c0_0 = constant 0 : index
- %d1 = dim %2, %c0_0 : memref<?xf32>
- %6 = alloc(%d1) : memref<?xf32> // temp buffer required due to alias %3
- "linalg.copy"(%1, %6) : (memref<?xf32>, memref<?xf32>) -> ()
- dealloc %1 : memref<?xf32>
+ %6 = memref.clone %1 : (memref<?xf32>) -> (memref<?xf32>)
+ memref.dealloc %1 : memref<?xf32>
br ^bb6(%6 : memref<?xf32>)
^bb6(%3: memref<?xf32>):
br ^bb7(%3 : memref<?xf32>)
^bb7(%4: memref<?xf32>):
- "linalg.copy"(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
- dealloc %3 : memref<?xf32> // free %3, since %4 is a non-crit. alias of %3
+ test.copy(%4, %arg2) : (memref<?xf32>, memref<?xf32>) -> ()
+ memref.dealloc %3 : memref<?xf32> // free %3, since %4 is a non-crit. alias of %3
return
}
```
@@ -399,7 +337,7 @@ Since %3 is a critical alias, BufferDeallocation introduces an additional
temporary copy in all predecessor blocks. %3 has an additional (non-critical)
alias %4 that extends the live range until the end of bb7. Therefore, we can
free %3 after its last use, while taking all aliases into account. Note that %4
- does not need to be freed, since we did not introduce a copy for it.
+does not need to be freed, since we did not introduce a copy for it.
The actual introduction of buffer copies is done after the fix-point iteration
has been terminated and all critical aliases have been detected. A critical
@@ -445,7 +383,7 @@ infer the high-level control flow:
func @inner_region_control_flow(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
- %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
+ %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%1 = custom.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>)
then(%arg2 : memref<?x?xf32>) { // aliases: %arg4, %1
custom.region_if_yield %arg2 : memref<?x?xf32>
@@ -468,11 +406,11 @@ operation to determine the value of %2 at runtime which creates an alias:
```mlir
func @nested_region_control_flow(%arg0 : index, %arg1 : index) -> memref<?x?xf32> {
%0 = cmpi "eq", %arg0, %arg1 : index
- %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+ %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
scf.yield %1 : memref<?x?xf32> // %2 will be an alias of %1
} else {
- %3 = alloc(%arg0, %arg1) : memref<?x?xf32> // nested allocation in a div.
+ %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> // nested allocation in a div.
// branch
use(%3)
scf.yield %1 : memref<?x?xf32> // %2 will be an alias of %1
@@ -489,13 +427,13 @@ alias of %1 which does not need to be tracked.
```mlir
func @nested_region_control_flow(%arg0: index, %arg1: index) -> memref<?x?xf32> {
%0 = cmpi "eq", %arg0, %arg1 : index
- %1 = alloc(%arg0, %arg0) : memref<?x?xf32>
+ %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%2 = scf.if %0 -> (memref<?x?xf32>) {
scf.yield %1 : memref<?x?xf32>
} else {
- %3 = alloc(%arg0, %arg1) : memref<?x?xf32>
+ %3 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
use(%3)
- dealloc %3 : memref<?x?xf32> // %3 can be safely freed here
+ memref.dealloc %3 : memref<?x?xf32> // %3 can be safely freed here
scf.yield %1 : memref<?x?xf32>
}
return %2 : memref<?x?xf32>
@@ -514,12 +452,12 @@ above that uses a nested allocation:
func @inner_region_control_flow_div(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
- %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
+ %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%1 = custom.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>)
then(%arg2 : memref<?x?xf32>) { // aliases: %arg4, %1
custom.region_if_yield %arg2 : memref<?x?xf32>
} else(%arg3 : memref<?x?xf32>) {
- %2 = alloc(%arg0, %arg1) : memref<?x?xf32> // aliases: %arg4, %1
+ %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> // aliases: %arg4, %1
custom.region_if_yield %2 : memref<?x?xf32>
} join(%arg4 : memref<?x?xf32>) { // aliases: %1
custom.region_if_yield %arg4 : memref<?x?xf32>
@@ -537,40 +475,22 @@ This causes BufferDeallocation to introduce additional copies:
func @inner_region_control_flow_div(
%arg0 : index,
%arg1 : index) -> memref<?x?xf32> {
- %0 = alloc(%arg0, %arg0) : memref<?x?xf32>
+ %0 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
%1 = custom.region_if %0 : memref<?x?xf32> -> (memref<?x?xf32>)
then(%arg2 : memref<?x?xf32>) {
- %c0 = constant 0 : index // determine dimension extents for temp allocation
- %2 = dim %arg2, %c0 : memref<?x?xf32>
- %c1 = constant 1 : index
- %3 = dim %arg2, %c1 : memref<?x?xf32>
- %4 = alloc(%2, %3) : memref<?x?xf32> // temp buffer required due to critic.
- // alias %arg4
- linalg.copy(%arg2, %4) : memref<?x?xf32>, memref<?x?xf32>
+ %4 = memref.clone %arg2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
custom.region_if_yield %4 : memref<?x?xf32>
} else(%arg3 : memref<?x?xf32>) {
- %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
- %c0 = constant 0 : index // determine dimension extents for temp allocation
- %3 = dim %2, %c0 : memref<?x?xf32>
- %c1 = constant 1 : index
- %4 = dim %2, %c1 : memref<?x?xf32>
- %5 = alloc(%3, %4) : memref<?x?xf32> // temp buffer required due to critic.
- // alias %arg4
- linalg.copy(%2, %5) : memref<?x?xf32>, memref<?x?xf32>
- dealloc %2 : memref<?x?xf32>
+ %2 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+ %5 = memref.clone %2 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+ memref.dealloc %2 : memref<?x?xf32>
custom.region_if_yield %5 : memref<?x?xf32>
} join(%arg4: memref<?x?xf32>) {
- %c0 = constant 0 : index // determine dimension extents for temp allocation
- %2 = dim %arg4, %c0 : memref<?x?xf32>
- %c1 = constant 1 : index
- %3 = dim %arg4, %c1 : memref<?x?xf32>
- %4 = alloc(%2, %3) : memref<?x?xf32> // this allocation will be removed by
- // applying the copy removal pass
- linalg.copy(%arg4, %4) : memref<?x?xf32>, memref<?x?xf32>
- dealloc %arg4 : memref<?x?xf32>
+ %4 = memref.clone %arg4 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+ memref.dealloc %arg4 : memref<?x?xf32>
custom.region_if_yield %4 : memref<?x?xf32>
}
- dealloc %0 : memref<?x?xf32> // %0 can be safely freed here
+ memref.dealloc %0 : memref<?x?xf32> // %0 can be safely freed here
return %1 : memref<?x?xf32>
}
```
@@ -600,7 +520,7 @@ func @loop_nested_if(
iter_args(%iterBuf = %buf) -> memref<2xf32> {
%1 = cmpi "eq", %i, %ub : index
%2 = scf.if %1 -> (memref<2xf32>) {
- %3 = alloc() : memref<2xf32> // makes %2 a critical alias due to a
+ %3 = memref.alloc() : memref<2xf32> // makes %2 a critical alias due to a
// divergent allocation
use(%3)
scf.yield %3 : memref<2xf32>
@@ -609,7 +529,7 @@ func @loop_nested_if(
}
scf.yield %2 : memref<2xf32>
}
- "linalg.copy"(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+ test.copy(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
return
}
```
@@ -634,31 +554,27 @@ func @loop_nested_if(
%step: index,
%buf: memref<2xf32>,
%res: memref<2xf32>) {
- %4 = alloc() : memref<2xf32>
- "linalg.copy"(%buf, %4) : (memref<2xf32>, memref<2xf32>) -> ()
+ %4 = memref.clone %buf : (memref<2xf32>) -> (memref<2xf32>)
%0 = scf.for %i = %lb to %ub step %step
iter_args(%iterBuf = %4) -> memref<2xf32> {
%1 = cmpi "eq", %i, %ub : index
%2 = scf.if %1 -> (memref<2xf32>) {
- %3 = alloc() : memref<2xf32> // makes %2 a critical alias
+ %3 = memref.alloc() : memref<2xf32> // makes %2 a critical alias
use(%3)
- %5 = alloc() : memref<2xf32> // temp copy due to crit. alias %2
- "linalg.copy"(%3, %5) : memref<2xf32>, memref<2xf32>
- dealloc %3 : memref<2xf32>
+ %5 = memref.clone %3 : (memref<2xf32>) -> (memref<2xf32>)
+ memref.dealloc %3 : memref<2xf32>
scf.yield %5 : memref<2xf32>
} else {
- %6 = alloc() : memref<2xf32> // temp copy due to crit. alias %2
- "linalg.copy"(%iterBuf, %6) : memref<2xf32>, memref<2xf32>
+ %6 = memref.clone %iterBuf : (memref<2xf32>) -> (memref<2xf32>)
scf.yield %6 : memref<2xf32>
}
- %7 = alloc() : memref<2xf32> // temp copy due to crit. alias %iterBuf
- "linalg.copy"(%2, %7) : memref<2xf32>, memref<2xf32>
- dealloc %2 : memref<2xf32>
- dealloc %iterBuf : memref<2xf32> // free backedge iteration variable
+ %7 = memref.clone %2 : (memref<2xf32>) -> (memref<2xf32>)
+ memref.dealloc %2 : memref<2xf32>
+ memref.dealloc %iterBuf : memref<2xf32> // free backedge iteration variable
scf.yield %7 : memref<2xf32>
}
- "linalg.copy"(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
- dealloc %0 : memref<2xf32> // free temp copy %0
+ test.copy(%0, %res) : (memref<2xf32>, memref<2xf32>) -> ()
+ memref.dealloc %0 : memref<2xf32> // free temp copy %0
return
}
```
@@ -684,46 +600,37 @@ deallocations.
In order to limit the complexity of the BufferDeallocation transformation, some
tiny code-polishing/optimization transformations are not applied on-the-fly
-during placement. Currently, there is only the CopyRemoval transformation to
-remove unnecessary copy and allocation operations.
+during placement. Currently, a canonicalization pattern is added to the clone
+operation to reduce the appearance of unnecessary clones.
Note: further transformations might be added to the post-pass phase in the
future.
-## CopyRemoval Pass
-
-A common pattern that arises during placement is the introduction of
-unnecessary temporary copies that are used instead of the original source
-buffer. For this reason, there is a post-pass transformation that removes these
-allocations and copies via `-copy-removal`. This pass, besides removing
-unnecessary copy operations, will also remove the dead allocations and their
-corresponding deallocation operations. The CopyRemoval pass can currently be
-applied to operations that implement the `CopyOpInterface` in any of these two
-situations which are
+## Clone Canonicalization
-* reusing the source buffer of the copy operation.
-* reusing the target buffer of the copy operation.
+During placement of clones it may happen, that unnecessary clones are inserted.
+If these clones appear with their corresponding dealloc operation within the
+same block, we can use the canonicalizer to remove these unnecessary operations.
+Note, that this step needs to take place after the insertion of clones and
+deallocs in the buffer deallocation step. The canonicalization inludes both,
+the newly created target value from the clone operation and the source
+operation.
-## Reusing the Source Buffer of the Copy Operation
+## Canonicalization of the Source Buffer of the Clone Operation
-In this case, the source of the copy operation can be used instead of target.
-The unused allocation and deallocation operations that are defined for this
-copy operation are also removed. Here is a working example generated by the
-BufferDeallocation pass that allocates a buffer with dynamic size. A deeper
+In this case, the source of the clone operation can be used instead of its
+target. The unused allocation and deallocation operations that are defined for
+this clone operation are also removed. Here is a working example generated by
+the BufferDeallocation pass that allocates a buffer with dynamic size. A deeper
analysis of this sample reveals that the highlighted operations are redundant
and can be removed.
```mlir
func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
- %7 = alloc(%arg0, %arg1) : memref<?x?xf32>
- %c0_0 = constant 0 : index
- %8 = dim %7, %c0_0 : memref<?x?xf32>
- %c1_1 = constant 1 : index
- %9 = dim %7, %c1_1 : memref<?x?xf32>
- %10 = alloc(%8, %9) : memref<?x?xf32>
- linalg.copy(%7, %10) : memref<?x?xf32>, memref<?x?xf32>
- dealloc %7 : memref<?x?xf32>
- return %10 : memref<?x?xf32>
+ %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+ %2 = memref.clone %1 : (memref<?x?xf32>) -> (memref<?x?xf32>)
+ memref.dealloc %1 : memref<?x?xf32>
+ return %2 : memref<?x?xf32>
}
```
@@ -731,53 +638,39 @@ Will be transformed to:
```mlir
func @dynamic_allocation(%arg0: index, %arg1: index) -> memref<?x?xf32> {
- %7 = alloc(%arg0, %arg1) : memref<?x?xf32>
- %c0_0 = constant 0 : index
- %8 = dim %7, %c0_0 : memref<?x?xf32>
- %c1_1 = constant 1 : index
- %9 = dim %7, %c1_1 : memref<?x?xf32>
- return %7 : memref<?x?xf32>
+ %1 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+ return %1 : memref<?x?xf32>
}
```
-In this case, the additional copy %10 can be replaced with its original source
-buffer %7. This also applies to the associated dealloc operation of %7.
+In this case, the additional copy %2 can be replaced with its original source
+buffer %1. This also applies to the associated dealloc operation of %1.
-To limit the complexity of this transformation, it only removes copy operations
-when the following constraints are met:
+## Canonicalization of the Target Buffer of the Clone Operation
-* The copy operation, the defining operation for the target value, and the
-deallocation of the source value lie in the same block.
-* There are no users/aliases of the target value between the defining operation
-of the target value and its copy operation.
-* There are no users/aliases of the source value between its associated copy
-operation and the deallocation of the source value.
+In this case, the target buffer of the clone operation can be used instead of
+its source. The unused deallocation operation that is defined for this clone
+operation is also removed.
-## Reusing the Target Buffer of the Copy Operation
-
-In this case, the target buffer of the copy operation can be used instead of
-its source. The unused allocation and deallocation operations that are defined
-for this copy operation are also removed.
-
-Consider the following example where a generic linalg operation writes the
-result to %temp and then copies %temp to %result. However, these two operations
-can be merged into a single step. Copy removal removes the copy operation and
-%temp, and replaces the uses of %temp with %result:
+Consider the following example where a generic test operation writes the result
+to %temp and then copies %temp to %result. However, these two operations
+can be merged into a single step. Canonicalization removes the clone operation
+and %temp, and replaces the uses of %temp with %result:
```mlir
func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
- %temp = alloc() : memref<2xf32>
- linalg.generic {
+ %temp = memref.alloc() : memref<2xf32>
+ test.generic {
args_in = 1 : i64,
args_out = 1 : i64,
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]} %arg0, %temp {
^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
%tmp2 = exp %gen2_arg0 : f32
- linalg.yield %tmp2 : f32
+ test.yield %tmp2 : f32
}: memref<2xf32>, memref<2xf32>
- "linalg.copy"(%temp, %result) : (memref<2xf32>, memref<2xf32>) -> ()
- dealloc %temp : memref<2xf32>
+ %result = memref.clone %temp : (memref<2xf32>) -> (memref<2xf32>)
+ memref.dealloc %temp : memref<2xf32>
return
}
```
@@ -786,33 +679,24 @@ Will be transformed to:
```mlir
func @reuseTarget(%arg0: memref<2xf32>, %result: memref<2xf32>){
- linalg.generic {
+ test.generic {
args_in = 1 : i64,
args_out = 1 : i64,
indexing_maps = [#map0, #map0],
iterator_types = ["parallel"]} %arg0, %result {
^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
%tmp2 = exp %gen2_arg0 : f32
- linalg.yield %tmp2 : f32
+ test.yield %tmp2 : f32
}: memref<2xf32>, memref<2xf32>
return
}
```
-Like before, several constraints to use the transformation apply:
-
-* The copy operation, the defining operation of the source value, and the
-deallocation of the source value lie in the same block.
-* There are no users/aliases of the target value between the defining operation
-of the source value and the copy operation.
-* There are no users/aliases of the source value between the copy operation and
-the deallocation of the source value.
-
## Known Limitations
-BufferDeallocation introduces additional copies using allocations from the
-“memref” dialect (“memref.alloc”). Analogous, all deallocations use the
-“memref” dialect-free operation “memref.dealloc”. The actual copy process is
-realized using “linalg.copy”. Furthermore, buffers are essentially immutable
-after their creation in a block. Another limitations are known in the case
-using unstructered control flow.
+BufferDeallocation introduces additional clones from “memref” dialect
+(“memref.clone”). Analogous, all deallocations use the “memref” dialect-free
+operation “memref.dealloc”. The actual copy process is realized using
+“test.copy”. Furthermore, buffers are essentially immutable after their
+creation in a block. Another limitations are known in the case using
+unstructered control flow.
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
index 9c2b912c0df15..0542423977835 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@@ -12,6 +12,7 @@
#include "mlir/IR/Dialect.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/CastInterfaces.h"
+#include "mlir/Interfaces/CopyOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index b3f5257df782a..fe0fd7d0ff363 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -12,6 +12,7 @@
include "mlir/Dialect/MemRef/IR/MemRefBase.td"
include "mlir/IR/OpBase.td"
include "mlir/Interfaces/CastInterfaces.td"
+include "mlir/Interfaces/CopyOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ViewLikeInterface.td"
include "mlir/IR/SymbolInterfaces.td"
@@ -214,6 +215,9 @@ def MemRef_BufferCastOp : MemRef_Op<"buffer_cast",
// Result type is tensor<4x?xf32>
%12 = memref.buffer_cast %10 : memref<4x?xf32, #map0, 42>
```
+
+ Note, that mutating the result of the buffer cast operation leads to
+ undefined behavior.
}];
let arguments = (ins AnyTensor:$tensor);
@@ -312,6 +316,46 @@ def MemRef_CastOp : MemRef_Op<"cast", [
let hasFolder = 1;
}
+//===----------------------------------------------------------------------===//
+// CloneOp
+//===----------------------------------------------------------------------===//
+
+def CloneOp : MemRef_Op<"clone", [
+ CopyOpInterface,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ ]> {
+ let builders = [
+ OpBuilder<(ins "Value":$value), [{
+ return build($_builder, $_state, value.getType(), value);
+ }]>];
+
+ let description = [{
+ Clones the data in the input view into an implicitly defined output view.
+
+ Usage:
+
+ ```mlir
+ %arg1 = memref.clone %arg0 : memref<?xf32> to memref<?xf32>
+ ```
+
+ Note, that mutating the source or result of the clone operation leads to
+ undefined behavior.
+ }];
+
+ let arguments = (ins Arg<AnyMemRef, "", []>:$input);
+ let results = (outs Arg<AnyMemRef, "", []>:$output);
+
+ let extraClassDeclaration = [{
+ Value getSource() { return input();}
+ Value getTarget() { return output(); }
+ }];
+
+ let assemblyFormat = "$input attr-dict `:` type($input) `to` type($output)";
+
+ let hasFolder = 1;
+ let hasCanonicalizer = 1;
+}
+
//===----------------------------------------------------------------------===//
// DeallocOp
//===----------------------------------------------------------------------===//
@@ -1090,6 +1134,9 @@ def TensorLoadOp : MemRef_Op<"tensor_load",
// Produces a value of tensor<4x?xf32> type.
%12 = memref.tensor_load %10 : memref<4x?xf32, #layout, memspace0>
```
+
+ If tensor load is used in the bufferization steps, mutating the source
+ buffer after loading leads to undefined behavior.
}];
let arguments = (ins Arg<AnyRankedOrUnrankedMemRef,
diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
new file mode 100644
index 0000000000000..024fe5ebfbc35
--- /dev/null
+++ b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
@@ -0,0 +1,29 @@
+//===- MemRefUtils.h - MemRef transformation utilities ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes for various transformation utilities for
+// the MemRefOps dialect. These are not passes by themselves but are used
+// either by passes, optimization sequences, or in turn by other transformation
+// utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_MEMREF_UTILS_MEMREFUTILS_H
+#define MLIR_DIALECT_MEMREF_UTILS_MEMREFUTILS_H
+
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+
+namespace mlir {
+
+/// Finds the associated dealloc that can be linked to our allocation nodes (if
+/// any).
+Operation *findDealloc(Value allocValue);
+
+} // end namespace mlir
+
+#endif // MLIR_DIALECT_MEMREF_UTILS_MEMREFUTILS_H
diff --git a/mlir/include/mlir/Transforms/BufferUtils.h b/mlir/include/mlir/Transforms/BufferUtils.h
index 33edffa372a37..e432fb8f53f55 100644
--- a/mlir/include/mlir/Transforms/BufferUtils.h
+++ b/mlir/include/mlir/Transforms/BufferUtils.h
@@ -39,10 +39,6 @@ class BufferPlacementAllocs {
static Operation *getStartOperation(Value allocValue, Block *placementBlock,
const Liveness &liveness);
- /// Find an associated dealloc operation that is linked to the given
- /// allocation node (if any).
- static Operation *findDealloc(Value allocValue);
-
public:
/// Initializes the internal list by discovering all supported allocation
/// nodes.
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 60ea4b188ae16..1d4234b38efca 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -63,9 +63,6 @@ std::unique_ptr<Pass> createBufferResultsToOutParamsPass();
/// Creates an instance of the Canonicalizer pass.
std::unique_ptr<Pass> createCanonicalizerPass();
-/// Create a pass that removes unnecessary Copy operations.
-std::unique_ptr<Pass> createCopyRemovalPass();
-
/// Creates a pass to perform common sub expression elimination.
std::unique_ptr<Pass> createCSEPass();
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 2305c4a391912..0e14dcb873e73 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -282,8 +282,6 @@ def BufferDeallocation : FunctionPass<"buffer-deallocation"> {
}];
let constructor = "mlir::createBufferDeallocationPass()";
- // TODO: this pass likely shouldn't depend on Linalg?
- let dependentDialects = ["linalg::LinalgDialect"];
}
def BufferHoisting : FunctionPass<"buffer-hoisting"> {
@@ -366,11 +364,6 @@ def Canonicalizer : Pass<"canonicalize"> {
let dependentDialects = ["memref::MemRefDialect"];
}
-def CopyRemoval : FunctionPass<"copy-removal"> {
- let summary = "Remove the redundant copies from input IR";
- let constructor = "mlir::createCopyRemovalPass()";
-}
-
def CSE : Pass<"cse"> {
let summary = "Eliminate common sub-expressions";
let description = [{
diff --git a/mlir/lib/Dialect/MemRef/CMakeLists.txt b/mlir/lib/Dialect/MemRef/CMakeLists.txt
index f33061b2d87cf..dc79a5087f8ec 100644
--- a/mlir/lib/Dialect/MemRef/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/CMakeLists.txt
@@ -1 +1,23 @@
-add_subdirectory(IR)
+add_mlir_dialect_library(MLIRMemRef
+ IR/MemRefDialect.cpp
+ IR/MemRefOps.cpp
+ Utils/MemRefUtils.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRefDialect
+
+ DEPENDS
+ MLIRStandardOpsIncGen
+ MLIRMemRefOpsIncGen
+
+ LINK_COMPONENTS
+ Core
+
+ LINK_LIBS PUBLIC
+ MLIRDialect
+ MLIRInferTypeOpInterface
+ MLIRIR
+ MLIRStandard
+ MLIRTensor
+ MLIRViewLikeInterface
+)
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index f71ba8f39b61a..ffb5bcf59b7dc 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/StandardOps/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -463,6 +464,76 @@ OpFoldResult CastOp::fold(ArrayRef<Attribute> operands) {
return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
}
+//===----------------------------------------------------------------------===//
+// CloneOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(CloneOp op) { return success(); }
+
+void CloneOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ effects.emplace_back(MemoryEffects::Read::get(), input(),
+ SideEffects::DefaultResource::get());
+ effects.emplace_back(MemoryEffects::Write::get(), output(),
+ SideEffects::DefaultResource::get());
+}
+
+namespace {
+/// Fold Dealloc operations that are deallocating an AllocOp that is only used
+/// by other Dealloc operations.
+struct SimplifyClones : public OpRewritePattern<CloneOp> {
+ using OpRewritePattern<CloneOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(CloneOp cloneOp,
+ PatternRewriter &rewriter) const override {
+ if (cloneOp.use_empty()) {
+ rewriter.eraseOp(cloneOp);
+ return success();
+ }
+
+ Value source = cloneOp.input();
+
+ // Removes the clone operation and the corresponding dealloc and alloc
+ // operation (if any).
+ auto tryRemoveClone = [&](Operation *sourceOp, Operation *dealloc,
+ Operation *alloc) {
+ if (!sourceOp || !dealloc || !alloc ||
+ alloc->getBlock() != dealloc->getBlock())
+ return false;
+ rewriter.replaceOp(cloneOp, source);
+ rewriter.eraseOp(dealloc);
+ return true;
+ };
+
+ // Removes unnecessary clones that are derived from the result of the clone
+ // op.
+ Operation *deallocOp = findDealloc(cloneOp.output());
+ Operation *sourceOp = source.getDefiningOp();
+ if (tryRemoveClone(sourceOp, deallocOp, sourceOp))
+ return success();
+
+ // Removes unnecessary clones that are derived from the source of the clone
+ // op.
+ deallocOp = findDealloc(source);
+ if (tryRemoveClone(sourceOp, deallocOp, cloneOp))
+ return success();
+
+ return failure();
+ }
+};
+
+} // end anonymous namespace.
+
+void CloneOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+ MLIRContext *context) {
+ results.insert<SimplifyClones>(context);
+}
+
+OpFoldResult CloneOp::fold(ArrayRef<Attribute> operands) {
+ return succeeded(foldMemRefCast(*this)) ? getResult() : Value();
+}
+
//===----------------------------------------------------------------------===//
// DeallocOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
new file mode 100644
index 0000000000000..26a9a217134e2
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
@@ -0,0 +1,35 @@
+//===- Utils.cpp - Utilities to support the MemRef dialect ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for the MemRef dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+
+using namespace mlir;
+
+/// Finds associated deallocs that can be linked to our allocation nodes (if
+/// any).
+Operation *mlir::findDealloc(Value allocValue) {
+ auto userIt = llvm::find_if(allocValue.getUsers(), [&](Operation *user) {
+ auto effectInterface = dyn_cast<MemoryEffectOpInterface>(user);
+ if (!effectInterface)
+ return false;
+ // Try to find a free effect that is applied to one of our values
+ // that will be automatically freed by our pass.
+ SmallVector<MemoryEffects::EffectInstance, 2> effects;
+ effectInterface.getEffectsOnValue(allocValue, effects);
+ return llvm::any_of(effects, [&](MemoryEffects::EffectInstance &it) {
+ return isa<MemoryEffects::Free>(it.getEffect());
+ });
+ });
+ // Assign the associated dealloc operation (if any).
+ return userIt != allocValue.user_end() ? *userIt : nullptr;
+}
diff --git a/mlir/lib/Transforms/BufferDeallocation.cpp b/mlir/lib/Transforms/BufferDeallocation.cpp
index aa837cb0e77c5..3ba744d8e6efb 100644
--- a/mlir/lib/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Transforms/BufferDeallocation.cpp
@@ -7,16 +7,15 @@
//===----------------------------------------------------------------------===//
//
// This file implements logic for computing correct alloc and dealloc positions.
-// Furthermore, buffer placement also adds required new alloc and copy
-// operations to ensure that all buffers are deallocated. The main class is the
+// Furthermore, buffer deallocation also adds required new clone operations to
+// ensure that all buffers are deallocated. The main class is the
// BufferDeallocationPass class that implements the underlying algorithm. In
// order to put allocations and deallocations at safe positions, it is
// significantly important to put them into the correct blocks. However, the
// liveness analysis does not pay attention to aliases, which can occur due to
// branches (and their associated block arguments) in general. For this purpose,
// BufferDeallocation firstly finds all possible aliases for a single value
-// (using the BufferAliasAnalysis class). Consider the following
-// example:
+// (using the BufferAliasAnalysis class). Consider the following example:
//
// ^bb0(%arg0):
// cond_br %cond, ^bb1, ^bb2
@@ -30,16 +29,16 @@
//
// We should place the dealloc for %new_value in exit. However, we have to free
// the buffer in the same block, because it cannot be freed in the post
-// dominator. However, this requires a new copy buffer for %arg1 that will
+// dominator. However, this requires a new clone buffer for %arg1 that will
// contain the actual contents. Using the class BufferAliasAnalysis, we
// will find out that %new_value has a potential alias %arg1. In order to find
// the dealloc position we have to find all potential aliases, iterate over
// their uses and find the common post-dominator block (note that additional
-// copies and buffers remove potential aliases and will influence the placement
+// clones and buffers remove potential aliases and will influence the placement
// of the deallocs). In all cases, the computed block can be safely used to free
// the %new_value buffer (may be exit or bb2) as it will die and we can use
// liveness information to determine the exact operation after which we have to
-// insert the dealloc. However, the algorithm supports introducing copy buffers
+// insert the dealloc. However, the algorithm supports introducing clone buffers
// and placing deallocs in safe locations to ensure that all buffers will be
// freed in the end.
//
@@ -52,10 +51,8 @@
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/StandardOps/Utils/Utils.h"
#include "mlir/IR/Operation.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
@@ -187,25 +184,25 @@ class Backedges {
/// The buffer deallocation transformation which ensures that all allocs in the
/// program have a corresponding de-allocation. As a side-effect, it might also
-/// introduce copies that in turn leads to additional allocs and de-allocations.
+/// introduce clones that in turn leads to additional deallocations.
class BufferDeallocation : BufferPlacementTransformationBase {
public:
BufferDeallocation(Operation *op)
: BufferPlacementTransformationBase(op), dominators(op),
postDominators(op) {}
- /// Performs the actual placement/creation of all temporary alloc, copy and
- /// dealloc nodes.
+ /// Performs the actual placement/creation of all temporary clone and dealloc
+ /// nodes.
void deallocate() {
- // Add additional allocations and copies that are required.
- introduceCopies();
+ // Add additional clones that are required.
+ introduceClones();
// Place deallocations for all allocation entries.
placeDeallocs();
}
private:
- /// Introduces required allocs and copy operations to avoid memory leaks.
- void introduceCopies() {
+ /// Introduces required clone operations to avoid memory leaks.
+ void introduceClones() {
// Initialize the set of values that require a dedicated memory free
// operation since their operands cannot be safely deallocated in a post
// dominator.
@@ -214,7 +211,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
SmallVector<std::tuple<Value, Block *>, 8> toProcess;
// Check dominance relation for proper dominance properties. If the given
- // value node does not dominate an alias, we will have to create a copy in
+ // value node does not dominate an alias, we will have to create a clone in
// order to free all buffers that can potentially leak into a post
// dominator.
auto findUnsafeValues = [&](Value source, Block *definingBlock) {
@@ -255,7 +252,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
// arguments at the correct locations.
aliases.remove(valuesToFree);
- // Add new allocs and additional copy operations.
+ // Add new allocs and additional clone operations.
for (Value value : valuesToFree) {
if (auto blockArg = value.dyn_cast<BlockArgument>())
introduceBlockArgCopy(blockArg);
@@ -269,7 +266,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
}
}
- /// Introduces temporary allocs in all predecessors and copies the source
+ /// Introduces temporary clones in all predecessors and copies the source
/// values into the newly allocated buffers.
void introduceBlockArgCopy(BlockArgument blockArg) {
// Allocate a buffer for the current block argument in the block of
@@ -285,9 +282,9 @@ class BufferDeallocation : BufferPlacementTransformationBase {
Value sourceValue =
branchInterface.getSuccessorOperands(it.getSuccessorIndex())
.getValue()[blockArg.getArgNumber()];
- // Create a new alloc and copy at the current location of the terminator.
- Value alloc = introduceBufferCopy(sourceValue, terminator);
- // Wire new alloc and successor operand.
+ // Create a new clone at the current location of the terminator.
+ Value clone = introduceCloneBuffers(sourceValue, terminator);
+ // Wire new clone and successor operand.
auto mutableOperands =
branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex());
if (!mutableOperands.hasValue())
@@ -296,7 +293,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
else
mutableOperands.getValue()
.slice(blockArg.getArgNumber(), 1)
- .assign(alloc);
+ .assign(clone);
}
// Check whether the block argument has implicitly defined predecessors via
@@ -310,7 +307,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
!(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
return;
- introduceCopiesForRegionSuccessors(
+ introduceClonesForRegionSuccessors(
regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
[&](RegionSuccessor &successorRegion) {
// Find a predecessor of our argRegion.
@@ -318,7 +315,7 @@ class BufferDeallocation : BufferPlacementTransformationBase {
});
// Check whether the block argument belongs to an entry region of the
- // parent operation. In this case, we have to introduce an additional copy
+ // parent operation. In this case, we have to introduce an additional clone
// for buffer that is passed to the argument.
SmallVector<RegionSuccessor, 2> successorRegions;
regionInterface.getSuccessorRegions(/*index=*/llvm::None, successorRegions);
@@ -329,20 +326,20 @@ class BufferDeallocation : BufferPlacementTransformationBase {
if (it == successorRegions.end())
return;
- // Determine the actual operand to introduce a copy for and rewire the
- // operand to point to the copy instead.
+ // Determine the actual operand to introduce a clone for and rewire the
+ // operand to point to the clone instead.
Value operand =
regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber())
[llvm::find(it->getSuccessorInputs(), blockArg).getIndex()];
- Value copy = introduceBufferCopy(operand, parentOp);
+ Value clone = introduceCloneBuffers(operand, parentOp);
auto op = llvm::find(parentOp->getOperands(), operand);
assert(op != parentOp->getOperands().end() &&
"parentOp does not contain operand");
- parentOp->setOperand(op.getIndex(), copy);
+ parentOp->setOperand(op.getIndex(), clone);
}
- /// Introduces temporary allocs in front of all associated nested-region
+ /// Introduces temporary clones in front of all associated nested-region
/// terminators and copies the source values into the newly allocated buffers.
void introduceValueCopyForRegionResult(Value value) {
// Get the actual result index in the scope of the parent terminator.
@@ -354,20 +351,20 @@ class BufferDeallocation : BufferPlacementTransformationBase {
// its parent operation.
return !successorRegion.getSuccessor();
};
- // Introduce a copy for all region "results" that are returned to the parent
- // operation. This is required since the parent's result value has been
- // considered critical. Therefore, the algorithm assumes that a copy of a
- // previously allocated buffer is returned by the operation (like in the
- // case of a block argument).
- introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(),
+ // Introduce a clone for all region "results" that are returned to the
+ // parent operation. This is required since the parent's result value has
+ // been considered critical. Therefore, the algorithm assumes that a clone
+ // of a previously allocated buffer is returned by the operation (like in
+ // the case of a block argument).
+ introduceClonesForRegionSuccessors(regionInterface, operation->getRegions(),
value, regionPredicate);
}
- /// Introduces buffer copies for all terminators in the given regions. The
+ /// Introduces buffer clones for all terminators in the given regions. The
/// regionPredicate is applied to every successor region in order to restrict
- /// the copies to specific regions.
+ /// the clones to specific regions.
template <typename TPredicate>
- void introduceCopiesForRegionSuccessors(
+ void introduceClonesForRegionSuccessors(
RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
Value argValue, const TPredicate ®ionPredicate) {
for (Region ®ion : regions) {
@@ -393,49 +390,37 @@ class BufferDeallocation : BufferPlacementTransformationBase {
walkReturnOperations(®ion, [&](Operation *terminator) {
// Extract the source value from the current terminator.
Value sourceValue = terminator->getOperand(operandIndex);
- // Create a new alloc at the current location of the terminator.
- Value alloc = introduceBufferCopy(sourceValue, terminator);
- // Wire alloc and terminator operand.
- terminator->setOperand(operandIndex, alloc);
+ // Create a new clone at the current location of the terminator.
+ Value clone = introduceCloneBuffers(sourceValue, terminator);
+ // Wire clone and terminator operand.
+ terminator->setOperand(operandIndex, clone);
});
}
}
- /// Creates a new memory allocation for the given source value and copies
+ /// Creates a new memory allocation for the given source value and clones
/// its content into the newly allocated buffer. The terminator operation is
- /// used to insert the alloc and copy operations at the right places.
- Value introduceBufferCopy(Value sourceValue, Operation *terminator) {
- // Avoid multiple copies of the same source value. This can happen in the
+ /// used to insert the clone operation at the right place.
+ Value introduceCloneBuffers(Value sourceValue, Operation *terminator) {
+ // Avoid multiple clones of the same source value. This can happen in the
// presence of loops when a branch acts as a backedge while also having
// another successor that returns to its parent operation. Note: that
// copying copied buffers can introduce memory leaks since the invariant of
- // BufferPlacement assumes that a buffer will be only copied once into a
- // temporary buffer. Hence, the construction of copy chains introduces
+ // BufferDeallocation assumes that a buffer will be only cloned once into a
+ // temporary buffer. Hence, the construction of clone chains introduces
// additional allocations that are not tracked automatically by the
// algorithm.
- if (copiedValues.contains(sourceValue))
+ if (clonedValues.contains(sourceValue))
return sourceValue;
- // Create a new alloc at the current location of the terminator.
- auto memRefType = sourceValue.getType().cast<MemRefType>();
+ // Create a new clone operation that copies the contents of the old
+ // buffer to the new one.
OpBuilder builder(terminator);
+ auto cloneOp =
+ builder.create<memref::CloneOp>(terminator->getLoc(), sourceValue);
- // Extract information about dynamically shaped types by
- // extracting their dynamic dimensions.
- auto dynamicOperands =
- getDynOperands(terminator->getLoc(), sourceValue, builder);
-
- // TODO: provide a generic interface to create dialect-specific
- // Alloc and CopyOp nodes.
- auto alloc = builder.create<memref::AllocOp>(terminator->getLoc(),
- memRefType, dynamicOperands);
-
- // Create a new copy operation that copies to contents of the old
- // allocation to the new one.
- builder.create<linalg::CopyOp>(terminator->getLoc(), sourceValue, alloc);
-
- // Remember the copy of original source value.
- copiedValues.insert(alloc);
- return alloc;
+ // Remember the clone of original source value.
+ clonedValues.insert(cloneOp);
+ return cloneOp;
}
/// Finds correct dealloc positions according to the algorithm described at
@@ -513,8 +498,8 @@ class BufferDeallocation : BufferPlacementTransformationBase {
/// position.
PostDominanceInfo postDominators;
- /// Stores already copied allocations to avoid additional copies of copies.
- ValueSetT copiedValues;
+ /// Stores already cloned buffers to avoid additional clones of clones.
+ ValueSetT clonedValues;
};
//===----------------------------------------------------------------------===//
@@ -522,8 +507,8 @@ class BufferDeallocation : BufferPlacementTransformationBase {
//===----------------------------------------------------------------------===//
/// The actual buffer deallocation pass that inserts and moves dealloc nodes
-/// into the right positions. Furthermore, it inserts additional allocs and
-/// copies if necessary. It uses the algorithm described at the top of the file.
+/// into the right positions. Furthermore, it inserts additional clones if
+/// necessary. It uses the algorithm described at the top of the file.
struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
void runOnFunction() override {
@@ -540,7 +525,7 @@ struct BufferDeallocationPass : BufferDeallocationBase<BufferDeallocationPass> {
return signalPassFailure();
}
- // Place all required temporary alloc, copy and dealloc nodes.
+ // Place all required temporary clone and dealloc nodes.
BufferDeallocation deallocation(getFunction());
deallocation.deallocate();
}
diff --git a/mlir/lib/Transforms/BufferUtils.cpp b/mlir/lib/Transforms/BufferUtils.cpp
index ab39f57b3fcc3..0cefd53d2d347 100644
--- a/mlir/lib/Transforms/BufferUtils.cpp
+++ b/mlir/lib/Transforms/BufferUtils.cpp
@@ -12,7 +12,7 @@
#include "mlir/Transforms/BufferUtils.h"
#include "PassDetail.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Operation.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
@@ -49,25 +49,6 @@ Operation *BufferPlacementAllocs::getStartOperation(Value allocValue,
return startOperation;
}
-/// Finds associated deallocs that can be linked to our allocation nodes (if
-/// any).
-Operation *BufferPlacementAllocs::findDealloc(Value allocValue) {
- auto userIt = llvm::find_if(allocValue.getUsers(), [&](Operation *user) {
- auto effectInterface = dyn_cast<MemoryEffectOpInterface>(user);
- if (!effectInterface)
- return false;
- // Try to find a free effect that is applied to one of our values
- // that will be automatically freed by our pass.
- SmallVector<MemoryEffects::EffectInstance, 2> effects;
- effectInterface.getEffectsOnValue(allocValue, effects);
- return llvm::any_of(effects, [&](MemoryEffects::EffectInstance &it) {
- return isa<MemoryEffects::Free>(it.getEffect());
- });
- });
- // Assign the associated dealloc operation (if any).
- return userIt != allocValue.user_end() ? *userIt : nullptr;
-}
-
/// Initializes the internal list by discovering all supported allocation
/// nodes.
BufferPlacementAllocs::BufferPlacementAllocs(Operation *op) { build(op); }
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 36f9e5b832be9..2b185fcf0b7ee 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -7,7 +7,6 @@ add_mlir_library(MLIRTransforms
BufferUtils.cpp
Bufferize.cpp
Canonicalizer.cpp
- CopyRemoval.cpp
CSE.cpp
Inliner.cpp
LocationSnapshot.cpp
diff --git a/mlir/lib/Transforms/CopyRemoval.cpp b/mlir/lib/Transforms/CopyRemoval.cpp
deleted file mode 100644
index c5a8da6329568..0000000000000
--- a/mlir/lib/Transforms/CopyRemoval.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-//===- CopyRemoval.cpp - Removing the redundant copies --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Interfaces/CopyOpInterface.h"
-#include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
-
-using namespace mlir;
-using namespace MemoryEffects;
-
-namespace {
-
-//===----------------------------------------------------------------------===//
-// CopyRemovalPass
-//===----------------------------------------------------------------------===//
-
-/// This pass removes the redundant Copy operations. Additionally, it
-/// removes the leftover definition and deallocation operations by erasing the
-/// copy operation.
-class CopyRemovalPass : public PassWrapper<CopyRemovalPass, OperationPass<>> {
-public:
- void runOnOperation() override {
- getOperation()->walk([&](CopyOpInterface copyOp) {
- reuseCopySourceAsTarget(copyOp);
- reuseCopyTargetAsSource(copyOp);
- });
- for (std::pair<Value, Value> &pair : replaceList)
- pair.first.replaceAllUsesWith(pair.second);
- for (Operation *op : eraseList)
- op->erase();
- }
-
-private:
- /// List of operations that need to be removed.
- llvm::SmallPtrSet<Operation *, 4> eraseList;
-
- /// List of values that need to be replaced with their counterparts.
- llvm::SmallDenseSet<std::pair<Value, Value>, 4> replaceList;
-
- /// Returns the allocation operation for `value` in `block` if it exists.
- /// nullptr otherwise.
- Operation *getAllocationOpInBlock(Value value, Block *block) {
- assert(block && "Block cannot be null");
- Operation *op = value.getDefiningOp();
- if (op && op->getBlock() == block) {
- auto effects = dyn_cast<MemoryEffectOpInterface>(op);
- if (effects && effects.hasEffect<Allocate>())
- return op;
- }
- return nullptr;
- }
-
- /// Returns the deallocation operation for `value` in `block` if it exists.
- /// nullptr otherwise.
- Operation *getDeallocationOpInBlock(Value value, Block *block) {
- assert(block && "Block cannot be null");
- auto valueUsers = value.getUsers();
- auto it = llvm::find_if(valueUsers, [&](Operation *op) {
- auto effects = dyn_cast<MemoryEffectOpInterface>(op);
- return effects && op->getBlock() == block && effects.hasEffect<Free>();
- });
- return (it == valueUsers.end() ? nullptr : *it);
- }
-
- /// Returns true if an operation between start and end operations has memory
- /// effect.
- bool hasMemoryEffectOpBetween(Operation *start, Operation *end) {
- assert((start || end) && "Start and end operations cannot be null");
- assert(start->getBlock() == end->getBlock() &&
- "Start and end operations should be in the same block.");
- Operation *op = start->getNextNode();
- while (op->isBeforeInBlock(end)) {
- if (isa<MemoryEffectOpInterface>(op))
- return true;
- op = op->getNextNode();
- }
- return false;
- };
-
- /// Returns true if `val` value has at least a user between `start` and
- /// `end` operations.
- bool hasUsersBetween(Value val, Operation *start, Operation *end) {
- assert((start || end) && "Start and end operations cannot be null");
- Block *block = start->getBlock();
- assert(block == end->getBlock() &&
- "Start and end operations should be in the same block.");
- return llvm::any_of(val.getUsers(), [&](Operation *op) {
- return op->getBlock() == block && start->isBeforeInBlock(op) &&
- op->isBeforeInBlock(end);
- });
- };
-
- bool areOpsInTheSameBlock(ArrayRef<Operation *> operations) {
- assert(!operations.empty() &&
- "The operations list should contain at least a single operation");
- Block *block = operations.front()->getBlock();
- return llvm::none_of(
- operations, [&](Operation *op) { return block != op->getBlock(); });
- }
-
- /// Input:
- /// func(){
- /// %from = alloc()
- /// write_to(%from)
- /// %to = alloc()
- /// copy(%from,%to)
- /// dealloc(%from)
- /// return %to
- /// }
- ///
- /// Output:
- /// func(){
- /// %from = alloc()
- /// write_to(%from)
- /// return %from
- /// }
- /// Constraints:
- /// 1) %to, copy and dealloc must all be defined and lie in the same block.
- /// 2) This transformation cannot be applied if there is a single user/alias
- /// of `to` value between the defining operation of `to` and the copy
- /// operation.
- /// 3) This transformation cannot be applied if there is a single user/alias
- /// of `from` value between the copy operation and the deallocation of `from`.
- /// TODO: Alias analysis is not available at the moment. Currently, we check
- /// if there are any operations with memory effects between copy and
- /// deallocation operations.
- void reuseCopySourceAsTarget(CopyOpInterface copyOp) {
- if (eraseList.count(copyOp))
- return;
-
- Value from = copyOp.getSource();
- Value to = copyOp.getTarget();
-
- Operation *copy = copyOp.getOperation();
- Block *copyBlock = copy->getBlock();
- Operation *fromDefiningOp = from.getDefiningOp();
- Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock);
- Operation *toDefiningOp = getAllocationOpInBlock(to, copyBlock);
- if (!fromDefiningOp || !fromFreeingOp || !toDefiningOp ||
- !areOpsInTheSameBlock({fromFreeingOp, toDefiningOp, copy}) ||
- hasUsersBetween(to, toDefiningOp, copy) ||
- hasUsersBetween(from, copy, fromFreeingOp) ||
- hasMemoryEffectOpBetween(copy, fromFreeingOp))
- return;
-
- replaceList.insert({to, from});
- eraseList.insert(copy);
- eraseList.insert(toDefiningOp);
- eraseList.insert(fromFreeingOp);
- }
-
- /// Input:
- /// func(){
- /// %to = alloc()
- /// %from = alloc()
- /// write_to(%from)
- /// copy(%from,%to)
- /// dealloc(%from)
- /// return %to
- /// }
- ///
- /// Output:
- /// func(){
- /// %to = alloc()
- /// write_to(%to)
- /// return %to
- /// }
- /// Constraints:
- /// 1) %from, copy and dealloc must all be defined and lie in the same block.
- /// 2) This transformation cannot be applied if there is a single user/alias
- /// of `to` value between the defining operation of `from` and the copy
- /// operation.
- /// 3) This transformation cannot be applied if there is a single user/alias
- /// of `from` value between the copy operation and the deallocation of `from`.
- /// TODO: Alias analysis is not available at the moment. Currently, we check
- /// if there are any operations with memory effects between copy and
- /// deallocation operations.
- void reuseCopyTargetAsSource(CopyOpInterface copyOp) {
- if (eraseList.count(copyOp))
- return;
-
- Value from = copyOp.getSource();
- Value to = copyOp.getTarget();
-
- Operation *copy = copyOp.getOperation();
- Block *copyBlock = copy->getBlock();
- Operation *fromDefiningOp = getAllocationOpInBlock(from, copyBlock);
- Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock);
- if (!fromDefiningOp || !fromFreeingOp ||
- !areOpsInTheSameBlock({fromFreeingOp, fromDefiningOp, copy}) ||
- hasUsersBetween(to, fromDefiningOp, copy) ||
- hasUsersBetween(from, copy, fromFreeingOp) ||
- hasMemoryEffectOpBetween(copy, fromFreeingOp))
- return;
-
- replaceList.insert({from, to});
- eraseList.insert(copy);
- eraseList.insert(fromDefiningOp);
- eraseList.insert(fromFreeingOp);
- }
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// CopyRemovalPass construction
-//===----------------------------------------------------------------------===//
-
-std::unique_ptr<Pass> mlir::createCopyRemovalPass() {
- return std::make_unique<CopyRemovalPass>();
-}
diff --git a/mlir/test/Transforms/buffer-deallocation.mlir b/mlir/test/Transforms/buffer-deallocation.mlir
index 25197d14fba77..35f7bbf79c8f5 100644
--- a/mlir/test/Transforms/buffer-deallocation.mlir
+++ b/mlir/test/Transforms/buffer-deallocation.mlir
@@ -30,13 +30,11 @@ func @condBranch(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
}
// CHECK-NEXT: cond_br
-// CHECK: %[[ALLOC0:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK: %[[ALLOC0:.*]] = memref.clone
// CHECK-NEXT: br ^bb3(%[[ALLOC0]]
-// CHECK: %[[ALLOC1:.*]] = memref.alloc()
+// CHECK: %[[ALLOC1:.*]] = memref.alloc
// CHECK-NEXT: test.buffer_based
-// CHECK: %[[ALLOC2:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: br ^bb3(%[[ALLOC2]]
// CHECK: test.copy
@@ -77,16 +75,12 @@ func @condBranchDynamicType(
}
// CHECK-NEXT: cond_br
-// CHECK: %[[DIM0:.*]] = memref.dim
-// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[DIM0]])
-// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
+// CHECK: %[[ALLOC0:.*]] = memref.clone
// CHECK-NEXT: br ^bb3(%[[ALLOC0]]
// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
// CHECK-NEXT: test.buffer_based
-// CHECK: %[[DIM1:.*]] = memref.dim %[[ALLOC1]]
-// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc(%[[DIM1]])
-// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC2]])
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: br ^bb3
// CHECK-NEXT: ^bb3(%[[ALLOC3:.*]]:{{.*}})
@@ -142,12 +136,10 @@ func @condBranchDynamicTypeNested(
return
}
-// CHECK-NEXT: cond_br
-// CHECK: ^bb1
-// CHECK: %[[DIM0:.*]] = memref.dim
-// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc(%[[DIM0]])
-// CHECK-NEXT: linalg.copy(%{{.*}}, %[[ALLOC0]])
-// CHECK-NEXT: br ^bb6
+// CHECK-NEXT: cond_br{{.*}}
+// CHECK-NEXT: ^bb1
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.clone
+// CHECK-NEXT: br ^bb6(%[[ALLOC0]]
// CHECK: ^bb2(%[[IDX:.*]]:{{.*}})
// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc(%[[IDX]])
// CHECK-NEXT: test.buffer_based
@@ -157,9 +149,7 @@ func @condBranchDynamicTypeNested(
// CHECK: ^bb4:
// CHECK-NEXT: br ^bb5(%[[ALLOC1]]{{.*}})
// CHECK-NEXT: ^bb5(%[[ALLOC2:.*]]:{{.*}})
-// CHECK: %[[DIM2:.*]] = memref.dim %[[ALLOC2]]
-// CHECK-NEXT: %[[ALLOC3:.*]] = memref.alloc(%[[DIM2]])
-// CHECK-NEXT: linalg.copy(%[[ALLOC2]], %[[ALLOC3]])
+// CHECK-NEXT: %[[ALLOC3:.*]] = memref.clone %[[ALLOC2]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK-NEXT: br ^bb6(%[[ALLOC3]]{{.*}})
// CHECK-NEXT: ^bb6(%[[ALLOC4:.*]]:{{.*}})
@@ -208,13 +198,11 @@ func @criticalEdge(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) {
return
}
-// CHECK-NEXT: %[[ALLOC0:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.clone
// CHECK-NEXT: cond_br
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
-// CHECK: %[[ALLOC2:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: test.copy
// CHECK-NEXT: memref.dealloc
@@ -419,20 +407,17 @@ func @moving_alloc_and_inserting_missing_dealloc(
return
}
-// CHECK-NEXT: cond_br
-// CHECK: ^bb1
-// CHECK: ^bb1
+// CHECK-NEXT: cond_br{{.*}}
+// CHECK-NEXT: ^bb1
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
-// CHECK: %[[ALLOC1:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %[[ALLOC0]]
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
// CHECK-NEXT: br ^bb3(%[[ALLOC1]]
// CHECK-NEXT: ^bb2
// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc()
// CHECK-NEXT: test.buffer_based
-// CHECK: %[[ALLOC3:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK-NEXT: %[[ALLOC3:.*]] = memref.clone %[[ALLOC2]]
// CHECK-NEXT: memref.dealloc %[[ALLOC2]]
// CHECK-NEXT: br ^bb3(%[[ALLOC3]]
// CHECK-NEXT: ^bb3(%[[ALLOC4:.*]]:{{.*}})
@@ -545,8 +530,7 @@ func @nested_regions_and_cond_branch(
}
// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
// CHECK-NEXT: cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
-// CHECK: %[[ALLOC0:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ARG1]], %[[ALLOC0]])
+// CHECK: %[[ALLOC0:.*]] = memref.clone %[[ARG1]]
// CHECK: ^[[BB2]]:
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
@@ -554,12 +538,11 @@ func @nested_regions_and_cond_branch(
// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC2]]
// CHECK: memref.dealloc %[[ALLOC2]]
// CHECK-NEXT: %{{.*}} = math.exp
-// CHECK: %[[ALLOC3:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC1]], %[[ALLOC3]])
+// CHECK: %[[ALLOC3:.*]] = memref.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: ^[[BB3:.*]]({{.*}}):
// CHECK: test.copy
-// CHECK-NEXT: dealloc
+// CHECK-NEXT: memref.dealloc
// -----
@@ -641,12 +624,10 @@ func @nested_region_control_flow_div(
// CHECK: %[[ALLOC0:.*]] = memref.alloc(%arg0, %arg0)
// CHECK-NEXT: %[[ALLOC1:.*]] = scf.if
-// CHECK: %[[ALLOC2:.*]] = memref.alloc
-// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC2]])
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[ALLOC0]]
// CHECK: scf.yield %[[ALLOC2]]
// CHECK: %[[ALLOC3:.*]] = memref.alloc(%arg0, %arg1)
-// CHECK: %[[ALLOC4:.*]] = memref.alloc
-// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
+// CHECK-NEXT: %[[ALLOC4:.*]] = memref.clone %[[ALLOC3]]
// CHECK: memref.dealloc %[[ALLOC3]]
// CHECK: scf.yield %[[ALLOC4]]
// CHECK: memref.dealloc %[[ALLOC0]]
@@ -823,20 +804,18 @@ func @nestedRegionsAndCondBranchAlloca(
// CHECK: (%[[cond:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %{{.*}}: {{.*}})
// CHECK-NEXT: cond_br %[[cond]], ^[[BB1:.*]], ^[[BB2:.*]]
// CHECK: ^[[BB1]]:
-// CHECK: %[[ALLOC0:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK: %[[ALLOC0:.*]] = memref.clone
// CHECK: ^[[BB2]]:
// CHECK: %[[ALLOC1:.*]] = memref.alloc()
// CHECK-NEXT: test.region_buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOC1]]
// CHECK: %[[ALLOCA:.*]] = memref.alloca()
// CHECK-NEXT: test.buffer_based in(%[[ARG1]]{{.*}}out(%[[ALLOCA]]
// CHECK: %{{.*}} = math.exp
-// CHECK: %[[ALLOC2:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy
+// CHECK: %[[ALLOC2:.*]] = memref.clone %[[ALLOC1]]
// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
// CHECK: ^[[BB3:.*]]({{.*}}):
// CHECK: test.copy
-// CHECK-NEXT: dealloc
+// CHECK-NEXT: memref.dealloc
// -----
@@ -888,15 +867,13 @@ func @loop_alloc(
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
-// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
-// CHECK: linalg.copy(%arg3, %[[ALLOC1]])
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %arg3
// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
// CHECK: cmpi
// CHECK: memref.dealloc %[[IALLOC]]
// CHECK: %[[ALLOC3:.*]] = memref.alloc()
-// CHECK: %[[ALLOC4:.*]] = memref.alloc()
-// CHECK: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
+// CHECK: %[[ALLOC4:.*]] = memref.clone %[[ALLOC3]]
// CHECK: memref.dealloc %[[ALLOC3]]
// CHECK: scf.yield %[[ALLOC4]]
// CHECK: }
@@ -974,25 +951,21 @@ func @loop_nested_if_alloc(
}
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
-// CHECK: %[[ALLOC1:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %arg3
// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC:.*]] = %[[ALLOC1]]
// CHECK: memref.dealloc %[[IALLOC]]
// CHECK: %[[ALLOC3:.*]] = scf.if
// CHECK: %[[ALLOC4:.*]] = memref.alloc()
-// CHECK-NEXT: %[[ALLOC5:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]])
+// CHECK-NEXT: %[[ALLOC5:.*]] = memref.clone %[[ALLOC4]]
// CHECK-NEXT: memref.dealloc %[[ALLOC4]]
// CHECK-NEXT: scf.yield %[[ALLOC5]]
-// CHECK: %[[ALLOC6:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]])
+// CHECK: %[[ALLOC6:.*]] = memref.clone %[[ALLOC0]]
// CHECK-NEXT: scf.yield %[[ALLOC6]]
-// CHECK: %[[ALLOC7:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]])
+// CHECK: %[[ALLOC7:.*]] = memref.clone %[[ALLOC3]]
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: scf.yield %[[ALLOC7]]
@@ -1040,17 +1013,14 @@ func @loop_nested_alloc(
// CHECK: %[[ALLOC0:.*]] = memref.alloc()
// CHECK-NEXT: memref.dealloc %[[ALLOC0]]
-// CHECK-NEXT: %[[ALLOC1:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]])
+// CHECK-NEXT: %[[ALLOC1:.*]] = memref.clone %arg3
// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC0:.*]] = %[[ALLOC1]])
-// CHECK: %[[ALLOC2:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]])
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.clone %[[IALLOC0]]
// CHECK-NEXT: memref.dealloc %[[IALLOC0]]
// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args
// CHECK-SAME: (%[[IALLOC1:.*]] = %[[ALLOC2]])
-// CHECK: %[[ALLOC5:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]])
+// CHECK-NEXT: %[[ALLOC5:.*]] = memref.clone %[[IALLOC1]]
// CHECK-NEXT: memref.dealloc %[[IALLOC1]]
// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args
@@ -1060,28 +1030,23 @@ func @loop_nested_alloc(
// CHECK: %[[ALLOC9:.*]] = scf.if
// CHECK: %[[ALLOC11:.*]] = memref.alloc()
-// CHECK-NEXT: %[[ALLOC12:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]])
+// CHECK-NEXT: %[[ALLOC12:.*]] = memref.clone %[[ALLOC11]]
// CHECK-NEXT: memref.dealloc %[[ALLOC11]]
// CHECK-NEXT: scf.yield %[[ALLOC12]]
-// CHECK: %[[ALLOC13:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]])
+// CHECK: %[[ALLOC13:.*]] = memref.clone %[[IALLOC2]]
// CHECK-NEXT: scf.yield %[[ALLOC13]]
// CHECK: memref.dealloc %[[IALLOC2]]
-// CHECK-NEXT: %[[ALLOC10:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]])
+// CHECK-NEXT: %[[ALLOC10:.*]] = memref.clone %[[ALLOC9]]
// CHECK-NEXT: memref.dealloc %[[ALLOC9]]
// CHECK-NEXT: scf.yield %[[ALLOC10]]
-// CHECK: %[[ALLOC7:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]])
+// CHECK: %[[ALLOC7:.*]] = memref.clone %[[ALLOC6]]
// CHECK-NEXT: memref.dealloc %[[ALLOC6]]
// CHECK-NEXT: scf.yield %[[ALLOC7]]
-// CHECK: %[[ALLOC4:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]])
+// CHECK: %[[ALLOC4:.*]] = memref.clone %[[ALLOC3]]
// CHECK-NEXT: memref.dealloc %[[ALLOC3]]
// CHECK-NEXT: scf.yield %[[ALLOC4]]
@@ -1183,8 +1148,7 @@ func @assumingOp(
// CHECK-NEXT: shape.assuming_yield %[[ARG1]]
// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[ARG0]]
// CHECK-NEXT: %[[TMP_ALLOC:.*]] = memref.alloc()
-// CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[TMP_ALLOC]], %[[RETURNING_ALLOC]])
+// CHECK-NEXT: %[[RETURNING_ALLOC:.*]] = memref.clone %[[TMP_ALLOC]]
// CHECK-NEXT: memref.dealloc %[[TMP_ALLOC]]
// CHECK-NEXT: shape.assuming_yield %[[RETURNING_ALLOC]]
// CHECK: test.copy(%[[ASSUMING_RESULT:.*]], %[[ARG2]])
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index e1869ac58f524..e54135f21b4bd 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -1120,3 +1120,87 @@ func @fold_trunci_sexti(%arg0: i1) -> i1 attributes {} {
%1 = trunci %0 : i8 to i1
return %1 : i1
}
+
+// CHECK-LABEL: func @simple_clone_elimination
+func @simple_clone_elimination() -> memref<5xf32> {
+ %ret = memref.alloc() : memref<5xf32>
+ %temp = memref.clone %ret : memref<5xf32> to memref<5xf32>
+ memref.dealloc %temp : memref<5xf32>
+ return %ret : memref<5xf32>
+}
+// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
+// CHECK-NOT: %[[temp:.*]] = memref.clone
+// CHECK-NOT: memref.dealloc %[[temp]]
+// CHECK: return %[[ret]]
+
+// -----
+
+// CHECK-LABEL: func @clone_loop_alloc
+func @clone_loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
+ %0 = memref.alloc() : memref<2xf32>
+ memref.dealloc %0 : memref<2xf32>
+ %1 = memref.clone %arg3 : memref<2xf32> to memref<2xf32>
+ %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) {
+ %3 = cmpi eq, %arg5, %arg1 : index
+ memref.dealloc %arg6 : memref<2xf32>
+ %4 = memref.alloc() : memref<2xf32>
+ %5 = memref.clone %4 : memref<2xf32> to memref<2xf32>
+ memref.dealloc %4 : memref<2xf32>
+ %6 = memref.clone %5 : memref<2xf32> to memref<2xf32>
+ memref.dealloc %5 : memref<2xf32>
+ scf.yield %6 : memref<2xf32>
+ }
+ linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32>
+ memref.dealloc %2 : memref<2xf32>
+ return
+}
+
+// CHECK-NEXT: %[[ALLOC0:.*]] = memref.clone
+// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for
+// CHECK-NEXT: memref.dealloc
+// CHECK-NEXT: %[[ALLOC2:.*]] = memref.alloc
+// CHECK-NEXT: scf.yield %[[ALLOC2]]
+// CHECK: linalg.copy(%[[ALLOC1]]
+// CHECK-NEXT: memref.dealloc %[[ALLOC1]]
+
+// -----
+
+// CHECK-LABEL: func @clone_nested_region
+func @clone_nested_region(%arg0: index, %arg1: index) -> memref<?x?xf32> {
+ %0 = cmpi eq, %arg0, %arg1 : index
+ %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
+ %2 = scf.if %0 -> (memref<?x?xf32>) {
+ %3 = scf.if %0 -> (memref<?x?xf32>) {
+ %9 = memref.clone %1 : memref<?x?xf32> to memref<?x?xf32>
+ scf.yield %9 : memref<?x?xf32>
+ } else {
+ %7 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
+ %10 = memref.clone %7 : memref<?x?xf32> to memref<?x?xf32>
+ memref.dealloc %7 : memref<?x?xf32>
+ scf.yield %10 : memref<?x?xf32>
+ }
+ %6 = memref.clone %3 : memref<?x?xf32> to memref<?x?xf32>
+ memref.dealloc %3 : memref<?x?xf32>
+ scf.yield %6 : memref<?x?xf32>
+ } else {
+ %3 = memref.alloc(%arg1, %arg1) : memref<?x?xf32>
+ %6 = memref.clone %3 : memref<?x?xf32> to memref<?x?xf32>
+ memref.dealloc %3 : memref<?x?xf32>
+ scf.yield %6 : memref<?x?xf32>
+ }
+ memref.dealloc %1 : memref<?x?xf32>
+ return %2 : memref<?x?xf32>
+}
+
+// CHECK: %[[ALLOC1:.*]] = memref.alloc
+// CHECK-NEXT: %[[ALLOC2:.*]] = scf.if
+// CHECK-NEXT: %[[ALLOC3_1:.*]] = scf.if
+// CHECK-NEXT: %[[ALLOC4_1:.*]] = memref.clone %[[ALLOC1]]
+// CHECK-NEXT: scf.yield %[[ALLOC4_1]]
+// CHECK: %[[ALLOC4_2:.*]] = memref.alloc
+// CHECK-NEXT: scf.yield %[[ALLOC4_2]]
+// CHECK: scf.yield %[[ALLOC3_1]]
+// CHECK: %[[ALLOC3_2:.*]] = memref.alloc
+// CHECK-NEXT: scf.yield %[[ALLOC3_2]]
+// CHECK: memref.dealloc %[[ALLOC1]]
+// CHECK-NEXT: return %[[ALLOC2]]
diff --git a/mlir/test/Transforms/copy-removal.mlir b/mlir/test/Transforms/copy-removal.mlir
deleted file mode 100644
index a91c5c2b95287..0000000000000
--- a/mlir/test/Transforms/copy-removal.mlir
+++ /dev/null
@@ -1,361 +0,0 @@
-// RUN: mlir-opt -copy-removal -split-input-file %s | FileCheck %s
-
-// All linalg copies except the linalg.copy(%1, %9) must be removed since the
-// defining operation of %1 and its DeallocOp have been defined in another block.
-
-// CHECK-LABEL: func @nested_region_control_flow_div_nested
-func @nested_region_control_flow_div_nested(%arg0: index, %arg1: index) -> memref<?x?xf32> {
- %0 = cmpi eq, %arg0, %arg1 : index
- %1 = memref.alloc(%arg0, %arg0) : memref<?x?xf32>
- // CHECK: %{{.*}} = scf.if
- %2 = scf.if %0 -> (memref<?x?xf32>) {
- // CHECK: %[[PERCENT3:.*]] = scf.if
- %3 = scf.if %0 -> (memref<?x?xf32>) {
- %c0_0 = constant 0 : index
- %7 = memref.dim %1, %c0_0 : memref<?x?xf32>
- %c1_1 = constant 1 : index
- %8 = memref.dim %1, %c1_1 : memref<?x?xf32>
- %9 = memref.alloc(%7, %8) : memref<?x?xf32>
- // CHECK: linalg.copy({{.*}}, %[[PERCENT9:.*]])
- linalg.copy(%1, %9) : memref<?x?xf32>, memref<?x?xf32>
- // CHECK: scf.yield %[[PERCENT9]]
- scf.yield %9 : memref<?x?xf32>
- } else {
- // CHECK: %[[PERCENT7:.*]] = memref.alloc
- %7 = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
- %c0_0 = constant 0 : index
- %8 = memref.dim %7, %c0_0 : memref<?x?xf32>
- %c1_1 = constant 1 : index
- %9 = memref.dim %7, %c1_1 : memref<?x?xf32>
- // CHECK-NOT: %{{.*}} = memref.alloc
- // CHECK-NOT: linalg.copy(%[[PERCENT7]], %{{.*}})
- // CHECK-NOT: memref.dealloc %[[PERCENT7]]
- %10 = memref.alloc(%8, %9) : memref<?x?xf32>
- linalg.copy(%7, %10) : memref<?x?xf32>, memref<?x?xf32>
- memref.dealloc %7 : memref<?x?xf32>
- // CHECK: scf.yield %[[PERCENT7]]
- scf.yield %10 : memref<?x?xf32>
- }
- %c0 = constant 0 : index
- %4 = memref.dim %3, %c0 : memref<?x?xf32>
- %c1 = constant 1 : index
- %5 = memref.dim %3, %c1 : memref<?x?xf32>
- // CHECK-NOT: %{{.*}} = memref.alloc
- // CHECK-NOT: linalg.copy(%[[PERCENT3]], %{{.*}})
- // CHECK-NOT: memref.dealloc %[[PERCENT3]]
- %6 = memref.alloc(%4, %5) : memref<?x?xf32>
- linalg.copy(%3, %6) : memref<?x?xf32>, memref<?x?xf32>
- memref.dealloc %3 : memref<?x?xf32>
- // CHECK: scf.yield %[[PERCENT3]]
- scf.yield %6 : memref<?x?xf32>
- } else {
- // CHECK: %[[PERCENT3:.*]] = memref.alloc
- %3 = memref.alloc(%arg1, %arg1) : memref<?x?xf32>
- %c0 = constant 0 : index
- %4 = memref.dim %3, %c0 : memref<?x?xf32>
- %c1 = constant 1 : index
- %5 = memref.dim %3, %c1 : memref<?x?xf32>
- // CHECK-NOT: %{{.*}} = memref.alloc
- // CHECK-NOT: linalg.copy(%[[PERCENT3]], %{{.*}})
- // CHECK-NOT: memref.dealloc %[[PERCENT3]]
- %6 = memref.alloc(%4, %5) : memref<?x?xf32>
- linalg.copy(%3, %6) : memref<?x?xf32>, memref<?x?xf32>
- memref.dealloc %3 : memref<?x?xf32>
- // CHECK: scf.yield %[[PERCENT3]]
- scf.yield %6 : memref<?x?xf32>
- }
- memref.dealloc %1 : memref<?x?xf32>
- return %2 : memref<?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @simple_test
-func @simple_test() -> memref<5xf32> {
- %temp = memref.alloc() : memref<5xf32>
- %ret = memref.alloc() : memref<5xf32>
- linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
- memref.dealloc %ret : memref<5xf32>
- return %temp : memref<5xf32>
-}
-// CHECK-SAME: () -> memref<5xf32>
-// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
-// CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
-// CHECK-NOT: memref.dealloc %[[ret]]
-// CHECK: return %[[ret]]
-
-// -----
-
-// It is legal to remove the copy operation that %ret has a usage before the copy
-// operation. The allocation of %temp and the deallocation of %ret should be also
-// removed.
-
-// CHECK-LABEL: func @test_with_ret_usage_before_copy
-func @test_with_ret_usage_before_copy() -> memref<5xf32> {
- %ret = memref.alloc() : memref<5xf32>
- %temp = memref.alloc() : memref<5xf32>
- %c0 = constant 0 : index
- %dimension = memref.dim %ret, %c0 : memref<5xf32>
- linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
- memref.dealloc %ret : memref<5xf32>
- return %temp : memref<5xf32>
-}
-// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
-// CHECK-NOT: %{{.*}} = memref.alloc
-// CHECK-NEXT: %{{.*}} = constant
-// CHECK-NEXT: %[[DIM:.*]] = memref.dim %[[ret]]
-// CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
-// CHECK-NOT: memref.dealloc %[[ret]]
-// CHECK: return %[[ret]]
-
-// -----
-
-// It is illegal to remove a copy operation that %ret has a usage after copy
-// operation.
-
-// CHECK-LABEL: func @test_with_ret_usage_after_copy
-func @test_with_ret_usage_after_copy() -> memref<5xf32> {
- %ret = memref.alloc() : memref<5xf32>
- %temp = memref.alloc() : memref<5xf32>
- // CHECK: linalg.copy
- linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
- %c0 = constant 0 : index
- %dimension = memref.dim %ret, %c0 : memref<5xf32>
- memref.dealloc %ret : memref<5xf32>
- return %temp : memref<5xf32>
-}
-
-// -----
-
-// It is illegal to remove a copy operation that %temp has a usage before copy
-// operation.
-
-// CHECK-LABEL: func @test_with_temp_usage_before_copy
-func @test_with_temp_usage_before_copy() -> memref<5xf32> {
- %ret = memref.alloc() : memref<5xf32>
- %temp = memref.alloc() : memref<5xf32>
- %c0 = constant 0 : index
- %dimension = memref.dim %temp, %c0 : memref<5xf32>
- // CHECK: linalg.copy
- linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
- memref.dealloc %ret : memref<5xf32>
- return %temp : memref<5xf32>
-}
-
-// -----
-
-// It is legal to remove the copy operation that %temp has a usage after the copy
-// operation. The allocation of %temp and the deallocation of %ret could be also
-// removed.
-
-// However the following pattern is not handled by copy removal.
-// %from = memref.alloc()
-// %to = memref.alloc()
-// copy(%from, %to)
-// read_from(%from) + write_to(%something_else)
-// memref.dealloc(%from)
-// return %to
-// In particular, linalg.generic is a memoryEffectOp between copy and dealloc.
-// Since no alias analysis is performed and no distinction is made between reads
-// and writes, the linalg.generic with effects blocks copy removal.
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @test_with_temp_usage_after_copy
-func @test_with_temp_usage_after_copy() -> memref<5xf32> {
- %ret = memref.alloc() : memref<5xf32>
- %res = memref.alloc() : memref<5xf32>
- %temp = memref.alloc() : memref<5xf32>
- linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
- linalg.generic {
- indexing_maps = [#map0, #map0],
- iterator_types = ["parallel"]}
- ins(%temp : memref<5xf32>)
- outs(%res : memref<5xf32>) {
- ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
- %tmp1 = math.exp %gen1_arg0 : f32
- linalg.yield %tmp1 : f32
- }
- memref.dealloc %ret : memref<5xf32>
- return %temp : memref<5xf32>
-}
-// CHECK-NEXT: %[[ret:.*]] = memref.alloc()
-// CHECK-NEXT: %[[res:.*]] = memref.alloc()
-// CHECK-NEXT: %[[temp:.*]] = memref.alloc()
-// CHECK-NEXT: linalg.copy(%[[ret]], %[[temp]])
-// CHECK-NEXT: linalg.generic
-// CHECK: memref.dealloc %[[ret]]
-// CHECK: return %[[temp]]
-
-// -----
-
-// CHECK-LABEL: func @make_allocation
-func @make_allocation() -> memref<5xf32> {
- %mem = memref.alloc() : memref<5xf32>
- return %mem : memref<5xf32>
-}
-
-// CHECK-LABEL: func @test_with_function_call
-func @test_with_function_call() -> memref<5xf32> {
- // CHECK-NEXT: %[[ret:.*]] = call @make_allocation() : () -> memref<5xf32>
- %ret = call @make_allocation() : () -> (memref<5xf32>)
- // CHECK-NOT: %{{.*}} = memref.alloc
- // CHECK-NOT: linalg.copy(%[[ret]], %{{.*}})
- // CHECK-NOT: memref.dealloc %[[ret]]
- %temp = memref.alloc() : memref<5xf32>
- linalg.copy(%ret, %temp) : memref<5xf32>, memref<5xf32>
- memref.dealloc %ret : memref<5xf32>
- // CHECK: return %[[ret]]
- return %temp : memref<5xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @multiple_deallocs_in_
diff erent_blocks
-func @multiple_deallocs_in_
diff erent_blocks(%cond : i1) -> memref<5xf32> {
- // CHECK-NEXT: %[[PERCENT0:.*]] = memref.alloc()
- %0 = memref.alloc() : memref<5xf32>
- cond_br %cond, ^bb1, ^bb2
-^bb1:
- memref.dealloc %0 : memref<5xf32>
- // CHECK: br ^[[BB3:.*]](%[[PERCENT0]]
- br ^bb3(%0 : memref<5xf32>)
-^bb2:
- // CHECK-NOT: %{{.*}} = memref.alloc
- // CHECK-NOT: linalg.copy(%[[PERCENT0]], %{{.*}})
- // CHECK-NOT: memref.dealloc %[[PERCENT0]]
- %temp = memref.alloc() : memref<5xf32>
- linalg.copy(%0, %temp) : memref<5xf32>, memref<5xf32>
- memref.dealloc %0 : memref<5xf32>
- // CHECK: br ^[[BB3]](%[[PERCENT0]]
- br ^bb3(%temp : memref<5xf32>)
-^bb3(%res : memref<5xf32>):
- return %res : memref<5xf32>
-}
-
-// -----
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @test_ReuseCopyTargetAsSource
-func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>, %result: memref<2xf32>){
- // CHECK-SAME: (%[[ARG0:.*]]: memref<2xf32>, %[[RES:.*]]: memref<2xf32>)
- // CHECK-NOT: %{{.*}} = memref.alloc
- %temp = memref.alloc() : memref<2xf32>
- // CHECK-NEXT: linalg.generic
- // CHECK-SAME: ins(%[[ARG0]]{{.*}}outs(%[[RES]]
- // CHECK-NOT: linalg.copy(%{{.*}}, %[[RES]])
- // CHECK-NOT: memref.dealloc %{{.*}}
- linalg.generic {
- indexing_maps = [#map0, #map0],
- iterator_types = ["parallel"]}
- ins(%arg0 : memref<2xf32>)
- outs(%temp : memref<2xf32>) {
- ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
- %tmp2 = math.exp %gen2_arg0 : f32
- linalg.yield %tmp2 : f32
- }
- linalg.copy(%temp, %result) : memref<2xf32>, memref<2xf32>
- memref.dealloc %temp : memref<2xf32>
- // CHECK: return
- return
-}
-
-// -----
-
-// Copy operation must not be removed since an operation writes to %to value
-// before copy.
-
-#map0 = affine_map<(d0) -> (d0)>
-
-// CHECK-LABEL: func @test_ReuseCopyTargetAsSource
-func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>){
- %to = memref.alloc() : memref<2xf32>
- %temp = memref.alloc() : memref<2xf32>
- linalg.generic {
- indexing_maps = [#map0, #map0],
- iterator_types = ["parallel"]}
- ins(%arg0 : memref<2xf32>)
- outs(%temp : memref<2xf32>) {
- ^bb0(%gen1_arg0: f32, %gen1_arg1: f32):
- %tmp1 = math.exp %gen1_arg0 : f32
- linalg.yield %tmp1 : f32
- }
- linalg.generic {
- indexing_maps = [#map0, #map0],
- iterator_types = ["parallel"]}
- ins(%arg0 : memref<2xf32>)
- outs(%to : memref<2xf32>) {
- ^bb0(%gen2_arg0: f32, %gen2_arg1: f32):
- %tmp2 = math.exp %gen2_arg0 : f32
- linalg.yield %tmp2 : f32
- }
- // CHECK: linalg.copy
- linalg.copy(%temp, %to) : memref<2xf32>, memref<2xf32>
- memref.dealloc %temp : memref<2xf32>
- return
-}
-
-// -----
-
-// The only redundant copy is linalg.copy(%4, %5)
-
-// CHECK-LABEL: func @loop_alloc
-func @loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) {
- // CHECK: %{{.*}} = memref.alloc()
- %0 = memref.alloc() : memref<2xf32>
- memref.dealloc %0 : memref<2xf32>
- // CHECK: %{{.*}} = memref.alloc()
- %1 = memref.alloc() : memref<2xf32>
- // CHECK: linalg.copy
- linalg.copy(%arg3, %1) : memref<2xf32>, memref<2xf32>
- %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) {
- %3 = cmpi eq, %arg5, %arg1 : index
- // CHECK: memref.dealloc
- memref.dealloc %arg6 : memref<2xf32>
- // CHECK: %[[PERCENT4:.*]] = memref.alloc()
- %4 = memref.alloc() : memref<2xf32>
- // CHECK-NOT: memref.alloc
- // CHECK-NOT: linalg.copy
- // CHECK-NOT: memref.dealloc
- %5 = memref.alloc() : memref<2xf32>
- linalg.copy(%4, %5) : memref<2xf32>, memref<2xf32>
- memref.dealloc %4 : memref<2xf32>
- // CHECK: %[[PERCENT6:.*]] = memref.alloc()
- %6 = memref.alloc() : memref<2xf32>
- // CHECK: linalg.copy(%[[PERCENT4]], %[[PERCENT6]])
- linalg.copy(%5, %6) : memref<2xf32>, memref<2xf32>
- scf.yield %6 : memref<2xf32>
- }
- // CHECK: linalg.copy
- linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32>
- memref.dealloc %2 : memref<2xf32>
- return
-}
-
-// -----
-
-// The linalg.copy operation can be removed in addition to alloc and dealloc
-// operations. All uses of %0 is then replaced with %arg2.
-
-// CHECK-LABEL: func @check_with_affine_dialect
-func @check_with_affine_dialect(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) {
- // CHECK-SAME: (%[[ARG0:.*]]: memref<4xf32>, %[[ARG1:.*]]: memref<4xf32>, %[[RES:.*]]: memref<4xf32>)
- // CHECK-NOT: memref.alloc
- %0 = memref.alloc() : memref<4xf32>
- affine.for %arg3 = 0 to 4 {
- %5 = affine.load %arg0[%arg3] : memref<4xf32>
- %6 = affine.load %arg1[%arg3] : memref<4xf32>
- %7 = cmpf ogt, %5, %6 : f32
- // CHECK: %[[SELECT_RES:.*]] = select
- %8 = select %7, %5, %6 : f32
- // CHECK-NEXT: affine.store %[[SELECT_RES]], %[[RES]]
- affine.store %8, %0[%arg3] : memref<4xf32>
- }
- // CHECK-NOT: linalg.copy
- // CHECK-NOT: dealloc
- linalg.copy(%0, %arg2) : memref<4xf32>, memref<4xf32>
- memref.dealloc %0 : memref<4xf32>
- //CHECK: return
- return
-}
More information about the Mlir-commits
mailing list