[Mlir-commits] [mlir] 94c0477 - [mlir][vector] Prevent incorrect vector.transfer_{read|write} hoisting (#66930)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Sep 29 07:34:41 PDT 2023
Author: Andrzej Warzyński
Date: 2023-09-29T15:34:37+01:00
New Revision: 94c04772bc2c2e4f266c66bd7b8aa4f5075469be
URL: https://github.com/llvm/llvm-project/commit/94c04772bc2c2e4f266c66bd7b8aa4f5075469be
DIFF: https://github.com/llvm/llvm-project/commit/94c04772bc2c2e4f266c66bd7b8aa4f5075469be.diff
LOG: [mlir][vector] Prevent incorrect vector.transfer_{read|write} hoisting (#66930)
At the moment, `hoistRedundantVectorTransfers` would hoist the
`vector.transfer_read`/`vector.transfer_write` pair in this function:
```mlir
func.func @no_hoisting_write_to_memref(%rhs: i32, %arg1: vector<1xi32>) {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4 = arith.constant 4 : index
%c20 = arith.constant 20 : index
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x2xi32>
%cast = memref.cast %alloca : memref<1x1x2xi32> to memref<1x1x2xi32>
%collapsed_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
scf.for %_ = %c0 to %c20 step %c4 {
%collapsed_2 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
%lhs = vector.transfer_read %collapsed_1[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
%acc = vector.transfer_read %collapsed_2[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
%op = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<1xi32>, i32
vector.transfer_write %op, %collapsed_1[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32>
}
return
}
```
as follows:
```mlir
func.func @no_hoisting_write_to_memref(%arg0: i32, %arg1: vector<1xi32>) {
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%c4 = arith.constant 4 : index
%c20 = arith.constant 20 : index
%alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x2xi32>
%collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
%collapse_shape_0 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
%0 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
%1 = vector.transfer_read %collapse_shape_0[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
%2 = scf.for %arg2 = %c0 to %c20 step %c4 iter_args(%arg3 = %0) -> (vector<1xi32>) {
%3 = vector.outerproduct %arg3, %arg0, %1 {kind = #vector.kind<add>} : vector<1xi32>, i32
scf.yield %3 : vector<1xi32>
}
vector.transfer_write %2, %collapse_shape[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32>
return
}
```
This is not safe. While one argument for `vector.outerproduct` (`%rhs`
from the original loop) is correctly being forwarded via `iter_args`,
the other one (`%acc` from the original loop) is not.
This patch disables hoisting in cases where the source of "candidate"
`vector.transfer_read` aliases with some other `memref`. A more generic
approach would be to make sure that all values are correctly forwarded
via `iter_args`, but that would require involving alias analysis.
[1] Based on https://github.com/openxla/iree/issues/14994.
Added:
Modified:
mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
mlir/test/Dialect/Linalg/hoisting.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
index 24cb754d65e16cd..d4444c3f869e5cc 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h
@@ -28,11 +28,19 @@ namespace linalg {
/// 3. No uses of the memref either dominate the transfer_read or are
/// dominated by the transfer_write (i.e. no aliasing between the write and
/// the read across the loop)
+/// 4. The source operands for vector.transfer_{read|write} do not originate
+/// from Ops implementing ViewLikeOpInterface (to reduce the risk of
+/// aliasing).
/// To improve hoisting opportunities, call the `moveLoopInvariantCode` helper
/// function on the candidate loop above which to hoist. Hoisting the transfers
/// results in scf::ForOp yielding the value that originally transited through
/// memory.
///
+/// TODO: To further improve hoisting opportunities, fold aliasing memref
+/// operations into respective vector.transfer{read|write} operations and
+/// avoid using ops implementing ViewLikeOpInterface as the source for transfer
+/// Ops.
+///
/// WARNING: This hoisting does not model parallelism and is generally incorrect
/// when used on distributed loops with memref semantics!
void hoistRedundantVectorTransfers(func::FuncOp func);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 1c68ca49725effb..221bec713b38aa3 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -55,11 +55,12 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
LoopLikeOpInterface loop) {
Value source = transferRead.getSource();
- // Skip subview and collapse_shape Ops
- while (auto subView = source.getDefiningOp<memref::SubViewOp>())
- source = subView.getSource();
- while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
- source = collapsed->getOperand(0);
+
+ // Skip view-like Ops and retrive the actual soruce Operation
+ while (auto srcOp =
+ dyn_cast_or_null<ViewLikeOpInterface>(source.getDefiningOp()))
+ source = srcOp.getViewSource();
+
llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
source.getUsers().end());
llvm::SmallDenseSet<Operation *, 32> processed;
@@ -68,12 +69,8 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
// If the user has already been processed skip.
if (!processed.insert(user).second)
continue;
- if (auto subView = dyn_cast<memref::SubViewOp>(user)) {
- users.append(subView->getUsers().begin(), subView->getUsers().end());
- continue;
- }
- if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
- users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
+ if (auto viewLike = dyn_cast<ViewLikeOpInterface>(user)) {
+ users.append(viewLike->getUsers().begin(), viewLike->getUsers().end());
continue;
}
if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
@@ -144,7 +141,9 @@ void mlir::linalg::hoistRedundantVectorTransfers(func::FuncOp func) {
// Approximate aliasing by checking that:
// 1. indices, vector type and permutation map are the same (i.e., the
// transfer_read/transfer_write ops are matching),
- // 2. no other operations in the loop access the same memref except
+ // 2. source operands for transfer.{read|write} do not originate from
+ // Ops implementing ViewLikeOpInterface.
+ // 3. no other operations in the loop access the same memref except
// for transfer_read/transfer_write accessing statically disjoint
// slices.
if (transferRead.getIndices() != transferWrite.getIndices() ||
@@ -152,6 +151,14 @@ void mlir::linalg::hoistRedundantVectorTransfers(func::FuncOp func) {
transferRead.getPermutationMap() != transferWrite.getPermutationMap())
return WalkResult::advance();
+ auto *source = transferRead.getSource().getDefiningOp();
+ if (source && isa_and_nonnull<ViewLikeOpInterface>(source))
+ return WalkResult::advance();
+
+ source = transferWrite.getSource().getDefiningOp();
+ if (source && isa_and_nonnull<ViewLikeOpInterface>(source))
+ return WalkResult::advance();
+
// TODO: may want to memoize this information for performance but it
// likely gets invalidated often.
DominanceInfo dom(loop);
diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
index e25914620726b9b..7d0c3648c344b1d 100644
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -765,10 +765,10 @@ transform.sequence failures(propagate) {
// CHECK-LABEL: func.func @no_hoisting_collapse_shape
// CHECK: scf.for {{.*}} {
-// CHECK: vector.transfer_write
-// CHECK: vector.transfer_read
-// CHECK: vector.transfer_write
-// CHECK: }
+// CHECK: vector.transfer_write {{.*}} : vector<4xi32>, memref<4xi32>
+// CHECK-NEXT: vector.transfer_read {{.*}} : memref<1x4x1xi32>, vector<1x4x1xi32>
+// CHECK-NEXT: vector.transfer_write {{.*}} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+// CHECK-NEXT: }
func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
%c0_i32 = arith.constant 0 : i32
@@ -827,3 +827,48 @@ transform.sequence failures(propagate) {
transform.structured.hoist_redundant_vector_transfers %0
: (!transform.any_op) -> !transform.any_op
}
+
+// -----
+
+// Regression test - hoisting the following `vector.transfer_{read|write}` pair
+// would not be safe:
+// %lhs = vector.transfer_read %collapsed_1[%c0]
+// vector.transfer_write %op, %collapsed_1[%c0]
+// That's because the following `vector.transfer_read` reads from the same
+// memory (i.e. `%collapsed_1` and `%collapsed_2` alias):
+// %acc = vector.transfer_read %collapsed_2[%c0]
+
+// CHECK-LABEL: func.func @no_hoisting_write_to_memref
+// CHECK: scf.for {{.*}} {
+// CHECK: vector.transfer_read {{.*}} : memref<2xi32>, vector<1xi32>
+// CHECK-NEXT: vector.transfer_read {{.*}} : memref<2xi32>, vector<1xi32>
+// CHECK-NEXT: vector.outerproduct {{.*}} : vector<1xi32>, i32
+// CHECK-NEXT: vector.transfer_write {{.*}} : vector<1xi32>, memref<2xi32>
+// CHECK-NEXT: }
+
+func.func @no_hoisting_write_to_memref(%rhs: i32, %arg1: vector<1xi32>) {
+ %c0_i32 = arith.constant 0 : i32
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c4 = arith.constant 4 : index
+ %c20 = arith.constant 20 : index
+ %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x1x2xi32>
+ %cast = memref.cast %alloca : memref<1x1x2xi32> to memref<1x1x2xi32>
+ %collapsed_1 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
+ scf.for %_ = %c0 to %c20 step %c4 {
+ %collapsed_2 = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x1x2xi32> into memref<2xi32>
+ %lhs = vector.transfer_read %collapsed_1[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
+ %acc = vector.transfer_read %collapsed_2[%c0], %c0_i32 {in_bounds = [true]} : memref<2xi32>, vector<1xi32>
+ %op = vector.outerproduct %lhs, %rhs, %acc {kind = #vector.kind<add>} : vector<1xi32>, i32
+ vector.transfer_write %op, %collapsed_1[%c0] {in_bounds = [true]} : vector<1xi32>, memref<2xi32>
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+ %0 = transform.structured.match ops{["func.func"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+ transform.structured.hoist_redundant_vector_transfers %0
+ : (!transform.any_op) -> !transform.any_op
+}
More information about the Mlir-commits
mailing list