[Mlir-commits] [mlir] [mlir][vector] Refine vector.transfer_read hoisting (PR #65770)

Fri Sep 8 12:08:00 PDT 2023

https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/65770:

>From 06a0d866c30e134e2fa3edf2dacc5e3f35669a77 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 8 Sep 2023 15:24:02 +0000
Subject: [PATCH 1/3] [mlir][vector] Refine vector.transfer_read hoisting

Make sure that when analysing a `vector.transfer_read` that's a
candidate for hoisting, the users of relevant `memref.collapse_shape`
Op are included in the alias analysis.
---
 .../Dialect/Linalg/Transforms/Hoisting.cpp    |  4 +++
 mlir/test/Dialect/Linalg/hoisting.mlir        | 36 +++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 5f20ea42e499241..7dae1caa48b9077 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -69,6 +69,10 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
       users.append(subView->getUsers().begin(), subView->getUsers().end());
       continue;
     }
+    if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
+      users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
+      continue;
+    }
     if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
       continue;
     if (!loop->isAncestor(user))
diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
index 5b1bb3fc15e09ea..6052775c7e8dea7 100644
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -756,3 +756,39 @@ transform.sequence failures(propagate) {
   transform.structured.hoist_redundant_vector_transfers %0
     : (!transform.any_op) -> !transform.any_op
 }
+
+// -----
+
+// The users of `memref.collapse_shape %alloca` below write to `%alloca`, which
+// means that hoisting `vector.transfer_read %alloca` further down is not safe.
+
+// CHECK-LABEL:  func.func @collapse_shape
+//       CHECK:    scf.for {{.*}} {
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_read
+//       CHECK:      vector.transfer_write
+//       CHECK:    }
+
+func.func @collapse_shape(%2: memref<1x20x1xi32>, %0:  memref<1x28x1xi32>, %1: memref<9x1xi32>, %idx1: index, %idx2: index, %vec: vector<4xi32>) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c20 = arith.constant 20 : index
+  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
+  scf.for %arg0 = %c0 to %c20 step %c4 {
+    %subview = memref.subview %2[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+    %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
+    vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
+    %37 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
+    vector.transfer_write %37, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+  %0 = transform.structured.match ops{["func.func"]} in %arg1
+    : (!transform.any_op) -> !transform.any_op
+  transform.structured.hoist_redundant_vector_transfers %0
+    : (!transform.any_op) -> !transform.any_op
+}

>From 6dc1616db250af5f8ab883262e23d96c20583f37 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 8 Sep 2023 18:35:17 +0000
Subject: [PATCH 2/3] Fix more cases

Added 2 more cases where `memref.collapse_shape` was ignored in alias
analysis.
---
 .../Dialect/Linalg/Transforms/Hoisting.cpp    |  2 +
 .../Transforms/VectorTransferOpTransforms.cpp |  4 ++
 mlir/test/Dialect/Linalg/hoisting.mlir        | 33 ++++++++++++++++
 .../Dialect/Vector/vector-transferop-opt.mlir | 39 ++++++++++++++++++-
 4 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 7dae1caa48b9077..6f5e0e54509caba 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -57,6 +57,8 @@ static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
   Value source = transferRead.getSource();
   while (auto subView = source.getDefiningOp<memref::SubViewOp>())
     source = subView.getSource();
+  while (auto collapsed = source.getDefiningOp<memref::CollapseShapeOp>())
+    source = collapsed->getOperand(0);
   llvm::SmallVector<Operation *, 32> users(source.getUsers().begin(),
                                            source.getUsers().end());
   llvm::SmallDenseSet<Operation *, 32> processed;
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
index 74d4b7636315fd5..f715c543eb17955 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
@@ -206,6 +206,10 @@ void TransferOptimization::storeToLoadForwarding(vector::TransferReadOp read) {
       users.append(subView->getUsers().begin(), subView->getUsers().end());
       continue;
     }
+    if (auto collapsed = dyn_cast<memref::CollapseShapeOp>(user)) {
+      users.append(collapsed->getUsers().begin(), collapsed->getUsers().end());
+      continue;
+    }
     if (isMemoryEffectFree(user) || isa<vector::TransferReadOp>(user))
       continue;
     if (auto write = dyn_cast<vector::TransferWriteOp>(user)) {
diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
index 6052775c7e8dea7..7c3a4416448dfb8 100644
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -792,3 +792,36 @@ transform.sequence failures(propagate) {
   transform.structured.hoist_redundant_vector_transfers %0
     : (!transform.any_op) -> !transform.any_op
 }
+
+// -----
+
+// Make sure that values originating from `memref.collapse_shape` are taken
+// into account in alias analysis.
+
+// CHECK-LABEL:  func.func @collapse_shape_2
+//       CHECK:    scf.for {{.*}} {
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_read
+
+func.func @collapse_shape_2(%vec: vector<1x12x1xi32>) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c20 = arith.constant 20 : index
+  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x12x1xi32>
+  scf.for %arg0 = %c0 to %c20 step %c4 {
+    %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
+    vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
+    %8 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
+    "prevent.dce"(%8) : (vector<12xi32>) ->()
+  }
+  return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+  %0 = transform.structured.match ops{["func.func"]} in %arg1
+    : (!transform.any_op) -> !transform.any_op
+  transform.structured.hoist_redundant_vector_transfers %0
+    : (!transform.any_op) -> !transform.any_op
+}
diff --git a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
index af4e7d86f62e372..e1a3de174164af1 100644
--- a/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
+++ b/mlir/test/Dialect/Vector/vector-transferop-opt.mlir
@@ -215,4 +215,41 @@ func.func @forward_dead_store_negative(%arg0: i1, %arg1 : memref<4x4xf32>,
   vector.transfer_write %v2, %arg1[%c1, %c0] {in_bounds = [true, true]} :
     vector<1x4xf32>, memref<4x4xf32>
   return %0 : vector<1x4xf32>
-}
\ No newline at end of file
+}
+
+
+// In the example below, forwarding of %1 is not safe:
+//     %1 = vector.transfer_read %subview
+//     vector.transfer_write %1, %alloca
+// (*) vector.transfer_write %vec, %collapse_shape 
+//     vector.transfer_write %1, %subview
+// That's because the following %2:
+//    %2 = vector.transfer_read %a
+// reads from memory poplulated by the 2nd vector.transfer_write above (*)
+
+// CHECK-LABEL:  func.func @collapse_shape
+//       CHECK:    scf.for {{.*}} {
+//       CHECK:      vector.transfer_read
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_write
+//       CHECK:      vector.transfer_read
+//       CHECK:      vector.transfer_write
+
+func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %vec: vector<4xi32>) {
+  %c0_i32 = arith.constant 0 : i32
+  %c0 = arith.constant 0 : index
+  %c4 = arith.constant 4 : index
+  %c20 = arith.constant 20 : index
+
+  %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
+  %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
+  scf.for %arg0 = %c0 to %c20 step %c4 {
+    %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+    %1 = vector.transfer_read %subview[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>, vector<1x4x1xi32>
+    vector.transfer_write %1, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32>
+    vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
+    %2 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
+    vector.transfer_write %2, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+  }
+  return
+}

>From 301a9201898c7024ac7c5449435bf96b8bd81daa Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 8 Sep 2023 19:07:28 +0000
Subject: [PATCH 3/3] Refine tests

---
 mlir/test/Dialect/Linalg/hoisting.mlir | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir
index 7c3a4416448dfb8..88081ae5623dd4d 100644
--- a/mlir/test/Dialect/Linalg/hoisting.mlir
+++ b/mlir/test/Dialect/Linalg/hoisting.mlir
@@ -759,6 +759,9 @@ transform.sequence failures(propagate) {
 
 // -----
 
+// Make sure that values originating from `memref.collapse_shape` are taken
+// into account in alias analysis.
+
 // The users of `memref.collapse_shape %alloca` below write to `%alloca`, which
 // means that hoisting `vector.transfer_read %alloca` further down is not safe.
 
@@ -769,18 +772,18 @@ transform.sequence failures(propagate) {
 //       CHECK:      vector.transfer_write
 //       CHECK:    }
 
-func.func @collapse_shape(%2: memref<1x20x1xi32>, %0:  memref<1x28x1xi32>, %1: memref<9x1xi32>, %idx1: index, %idx2: index, %vec: vector<4xi32>) {
+func.func @collapse_shape(%in_0: memref<1x20x1xi32>, %1: memref<9x1xi32>, %vec: vector<4xi32>) {
   %c0_i32 = arith.constant 0 : i32
   %c0 = arith.constant 0 : index
   %c4 = arith.constant 4 : index
   %c20 = arith.constant 20 : index
   %alloca = memref.alloca() {alignment = 64 : i64} : memref<1x4x1xi32>
   scf.for %arg0 = %c0 to %c20 step %c4 {
-    %subview = memref.subview %2[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+    %subview = memref.subview %in_0[0, %arg0, 0] [1, 4, 1] [1, 1, 1] : memref<1x20x1xi32> to memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
     %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x4x1xi32> into memref<4xi32>
     vector.transfer_write %vec, %collapse_shape[%c0] {in_bounds = [true]} : vector<4xi32>, memref<4xi32>
-    %37 = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
-    vector.transfer_write %37, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
+    %read = vector.transfer_read %alloca[%c0, %c0, %c0], %c0_i32 {in_bounds = [true, true, true]} : memref<1x4x1xi32>, vector<1x4x1xi32>
+    vector.transfer_write %read, %subview[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x4x1xi32>, memref<1x4x1xi32, strided<[20, 1, 1], offset: ?>>
   }
   return
 }
@@ -798,6 +801,10 @@ transform.sequence failures(propagate) {
 // Make sure that values originating from `memref.collapse_shape` are taken
 // into account in alias analysis.
 
+// `vector.transfer_read` below reads from the result of
+// `memref.collapse_shape`. As it's the same memref as the one that the
+// preceeding `vector.transfer_write` writes to, hositing %8 is not safe.
+
 // CHECK-LABEL:  func.func @collapse_shape_2
 //       CHECK:    scf.for {{.*}} {
 //       CHECK:      vector.transfer_write
@@ -812,8 +819,8 @@ func.func @collapse_shape_2(%vec: vector<1x12x1xi32>) {
   scf.for %arg0 = %c0 to %c20 step %c4 {
     %collapse_shape = memref.collapse_shape %alloca [[0, 1, 2]] : memref<1x12x1xi32> into memref<12xi32>
     vector.transfer_write %vec, %alloca[%c0, %c0, %c0] {in_bounds = [true, true, true]} : vector<1x12x1xi32>, memref<1x12x1xi32>
-    %8 = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
-    "prevent.dce"(%8) : (vector<12xi32>) ->()
+    %read = vector.transfer_read %collapse_shape[%c0], %c0_i32 {in_bounds = [true]} : memref<12xi32>, vector<12xi32>
+    "prevent.dce"(%read) : (vector<12xi32>) ->()
   }
   return
 }