[Mlir-commits] [mlir] [mlir][vector] Refine vector.transfer_read hoisting/forwarding (PR #65770)

Tue Sep 12 02:32:56 PDT 2023

https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/65770:

>From 235fcd92899bad6782f377c61c67dd81d42c7146 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 8 Sep 2023 15:24:02 +0000
Subject: [PATCH 1/3] [mlir][vector] Refine vector.transfer_read
 hoisting/forwarding

Make sure that when analysing a `vector.transfer_read` that's a candidate
for either hoisting or store-to-load forwarding, memref.collapse_shape
Ops are correctly included in the alias analysis. This is done by
either:
  *  making sure that relevant users are taken into account, or
  *  source Ops are correctly identified (i.e. `memref.collapse_shape`
     is not really the true source).

Note that this won't fix aliasing for other cases (e.g. with
memref.expand_shape). Ideally, we should query an alias analysis in e.g.
`noAliasingUseInLoop` instead of hard-coding ops
---
 .../Dialect/Linalg/Transforms/Hoisting.cpp    |  6 ++
 .../Transforms/VectorTransferOpTransforms.cpp |  4 ++
 mlir/test/Dialect/Linalg/hoisting.mlir        | 71 +++++++++++++++++++
 .../Dialect/Vector/vector-transferop-opt.mlir | 39 +++++++++-
 4 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 5f20ea42e499241..6f5e0e54509caba 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -57,6 +57,8 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
   Value source = transferRead.getSource();
   while (auto subView = source.getDefiningOp<memref::SubViewOp>())
     source = subView.getSource();
+  while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
+    source = collapsed->getOperand(0);
   llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
                                            source.getUsers().end());
   llvm::SmallDenseSet<Operation *, 32> processed;
@@ -69,6 +71,10 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
       users.append(subView->getUsers().begin(), subView->getUsers().end());
       continue;
     }
+    if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
+      users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
+      continue;
+    }
     if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
       continue;
     if (!loop->isAncestor(user))
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
index 74d4b7636315fd5..f715c543eb17955 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
@@ -206,6 +206,10 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
       users.append(subView->getUsers().begin(), subView->getUsers().end());
       continue;
     }
+    if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
+      users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
+      continue;
+    }
     if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
       continue;
     if (auto write = dyn_cast<vector::TransferWriteOp>(user)) {
diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
index 5b1bb3fc15e09ea..e25914620726b9b 100644
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -756,3 +756,74 @@ transform.sequence failures(propagate) {
   transform.structured.hoist_redundant_vector_transfers %0
     : (!transform.any_op) -> !transform.any_op
 }
+
+// -----
+
+// Regression test - `vector.transfer_read` below should not be hoisted.
+// Indeed, %collapse_shape (written to by `vector.transfer_write`) and %alloca
+// (read by `vector.transfer_read`) alias.
+
+// CHECK-LABEL:  func.func @no_hoisting_collapse_shape
+//       CHECK:    scf.for {{.*}} {
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_read
+//       CHECK:      vector.transfer_write
+//       CHECK:    }
+
+func.func @no_hoisting_collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c20 = arith.constant 20 : index
+  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
+  scf.for %arg0 = %c0 to %c20 step %c4 {
+    %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+    %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
+    vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
+    %read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
+    vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+  %0 = transform.structured.match ops{["func.func"]} in %arg1
+    : (!transform.any_op) -> !transform.any_op
+  transform.structured.hoist_redundant_vector_transfers %0
+    : (!transform.any_op) -> !transform.any_op
+}
+
+// -----
+
+// Regression test - `vector.transfer_read` below should not be hoisted.
+// Indeed, %collapse_shape (read by `vector.transfer_read`) and %alloca
+// (written to by `vector.transfer_write`) alias.
+
+// CHECK-LABEL:  func.func @no_hoisting_collapse_shape_2
+//       CHECK:    scf.for {{.*}} {
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_read
+
+func.func @no_hoisting_collapse_shape_2(%vec: vector<1x12x1xi32>) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c20 = arith.constant 20 : index
+  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32>
+  scf.for %arg0 = %c0 to %c20 step %c4 {
+    %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
+    vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
+    %read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
+    "prevent.dce"(%read) : (vector<12xi32>) ->()
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+  %0 = transform.structured.match ops{["func.func"]} in %arg1
+    : (!transform.any_op) -> !transform.any_op
+  transform.structured.hoist_redundant_vector_transfers %0
+    : (!transform.any_op) -> !transform.any_op
+}
diff --git a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
index af4e7d86f62e372..0c460b24d8258ea 100644
--- a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
+++ b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
@@ -215,4 +215,41 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
   vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
     vector<1x4xf32>, memref<4x4xf32>
   return %0 : vector<1x4xf32>
-}
\ No newline at end of file
+}
+
+
+// Regression test - the following _potential forwarding_ of %1 to the final
+// `vector.transfer_write` would not be safe:
+//         %1 = vector.transfer_read %subview
+//         vector.transfer_write %1, %alloca
+//         vector.transfer_write %vec, %collapse_shape 
+//         vector.transfer_write %1, %subview
+// Indeed, %alloca and %collapse_shape alias and hence %2 != %1.
+
+// CHECK-LABEL:  func.func @collapse_shape
+//       CHECK:    scf.for {{.*}} {
+//       CHECK:      vector.transfer_read
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_read
+//       CHECK:      vector.transfer_write
+
+func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c20 = arith.constant 20 : index
+
+  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
+  %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
+  scf.for %arg0 = %c0 to %c20 step %c4 {
+    %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+    %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
+    // $alloca and $collapse_shape alias
+    vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
+    vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
+    %2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
+    vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+  }
+  return
+}

>From bb31a729a05ac896e557a39199a1f88be101253f Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Mon, 11 Sep 2023 09:28:21 +0000
Subject: [PATCH 2/3] fixup! [mlir][vector] Refine vector.transfer_read
 hoisting/forwarding

---
 mlir/test/Dialect/Vector/vector-transferop-opt.mlir | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
index 0c460b24d8258ea..f43367ab4aeba7d 100644
--- a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
+++ b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
@@ -223,8 +223,11 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
 //         %1 = vector.transfer_read %subview
 //         vector.transfer_write %1, %alloca
 //         vector.transfer_write %vec, %collapse_shape 
+//         %2 = vector.transfer_read %alloca
 //         vector.transfer_write %1, %subview
-// Indeed, %alloca and %collapse_shape alias and hence %2 != %1.
+// Indeed, %alloca and %collapse_shape alias and hence %2 != %1. Instead, the
+// final `vector.transfer_write` should be preserved as:
+//         vector.transfer_write %2, %subview
 
 // CHECK-LABEL:  func.func @collapse_shape
 //       CHECK:    scf.for {{.*}} {

>From d91aee84256c87d6455f7531d4604ba5b5d34cc1 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Tue, 12 Sep 2023 09:25:52 +0000
Subject: [PATCH 3/3] fixup! [mlir][vector] Refine vector.transfer_read
 hoisting/forwarding

---
 mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 6f5e0e54509caba..2ee21099cfb14c7 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -55,6 +55,7 @@ void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
 static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
                                 LoopLikeOpInterface loop) {
   Value source = transferRead.getSource();
+  // Skip subview and collapse_shape Ops
   while (auto subView = source.getDefiningOp<memref::SubViewOp>())
     source = subView.getSource();
   while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())