[Mlir-commits] [mlir] [mlir][vector] Update tests for collapse 5/n (nfc) (PR #96227)

Fri Jul 12 01:26:18 PDT 2024

https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/96227

>From 6f6d0c583ececcf0a17254d0c4f58dc237c2a2ca Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 20 Jun 2024 14:34:30 +0100
Subject: [PATCH 1/6] [mlir][vector] Restrict
 DropInnerMostUnitDimsTransferWrite

Restrict `DropInnerMostUnitDimsTransferWrite` so that it fails when one
of the indices to be dropped could be != 0, e.g.

```mlir
func.func @negative_example(
    %arg0: memref<16x1xf32>,
    %arg1: vector<8x1xf32>,
    %idx_1: index,
    %idx_2: index) {

  %c0 = arith.constant 0 : index
  vector.transfer_write %arg1, %arg0[%idx_1, %idx_2] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
  return
}
```

This is an edge case that could represent an out-of-bounds access,
though that will depend on the actual value of `%i`. Importantly,
_without this change_ it would be transformed as follows:
```mlir
func.func @negative_example(
    %arg0: memref<16x1xf32>,
    %arg1: vector<8x1xf32>,
    %idx_1: index,
    %idx_2: index) {

  %subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
  %0 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32>
  vector.transfer_write %0, %subview[%idx_1] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
  return
}
```

This is incorrect - `%idx_2` is ignored. Hence the extra restriction to
avoid such cases.

NOTE: This PR is limited to `vector.transfer_write`. Similar patch for
`vector.transfer_read`: #94904
---
 .../Vector/Transforms/VectorTransforms.cpp    |  5 ++++
 ...tor-transfer-collapse-inner-most-dims.mlir | 27 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index da5954b70a2ec..045bc8142cea0 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1394,6 +1394,11 @@ class DropInnerMostUnitDimsTransferWrite
     if (dimsToDrop == 0)
       return failure();
 
+    // Make sure that the indices to be dropped are equal 0.
+    // TODO: Deal with cases when the indices are not 0.
+    if (!llvm::all_of(writeOp.getIndices().take_back(dimsToDrop), isZeroIndex))
+      return failure();
+
     auto resultTargetVecType =
         VectorType::get(targetType.getShape().drop_back(dimsToDrop),
                         targetType.getElementType(),
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index bd6845d1c7cda..ce66a9f6a5ff0 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -367,6 +367,33 @@ func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b:
 
 // -----
 
+func.func @contiguous_inner_most_non_zero_idxs(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idxs(
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<8x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) {
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
+
+// The index to be dropped is != 0 - this is currently not supported.
+
+func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idxs
+// CHECK-NOT:     memref.subview
+// CHECK-NOT:     memref.shape_cast
+// CHECK:         vector.transfer_write
+
+// -----
+
 func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0]

>From 72505d0cb6bd67e216b9c78e2605ad86a1c3674d Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 21 Jun 2024 16:35:59 +0100
Subject: [PATCH 2/6] fixup! [mlir][vector] Restrict
 DropInnerMostUnitDimsTransferWrite

Remove duplicate test
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 31 -------------------
 1 file changed, 31 deletions(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index ce66a9f6a5ff0..6fb3cf5ca547d 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -113,19 +113,6 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b:
 
 // -----
 
-func.func @contiguous_inner_most_dim_non_zero_idx(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
-  %c0 = arith.constant 0 : index
-  %f0 = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %c0], %f0 : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
-}
-//      CHECK: func @contiguous_inner_most_dim_non_zero_idx(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index) -> vector<8x1xf32>
-//      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
-// CHECK-SAME:     memref<16x1xf32> to memref<16xf32, strided<[1]>>
-//      CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]
-//      CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
-//      CHECK:   return %[[RESULT]]
-
 // The index to be dropped is != 0 - this is currently not supported.
 func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %f0 = arith.constant 0.0 : f32
@@ -136,24 +123,6 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%A: memref<16x1xf32>
 // CHECK-NOT:     memref.subview
 // CHECK:         vector.transfer_read
 
-// Same as the top example within this split, but with the outer vector
-// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
-
-func.func @contiguous_inner_most_dim_non_zero_idx_scalable_inner_dim(%A: memref<16x1xf32>, %i:index) -> (vector<[8]x1xf32>) {
-  %c0 = arith.constant 0 : index
-  %f0 = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %c0], %f0 : memref<16x1xf32>, vector<[8]x1xf32>
-  return %1 : vector<[8]x1xf32>
-}
-// CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idx_scalable_inner_dim(
-// CHECK-SAME:    %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index) -> vector<[8]x1xf32>
-//       CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
-//  CHECK-SAME:     memref<16x1xf32> to memref<16xf32, strided<[1]>>
-//       CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]
-//       CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<[8]xf32> to vector<[8]x1xf32>
-//       CHECK:   return %[[RESULT]]
-
 // -----
 
 func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {

>From 6a3fe47fbdefcde96d76528530e79774c28534bb Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 11 Jul 2024 10:03:16 +0100
Subject: [PATCH 3/6] fixup! fixup! [mlir][vector] Restrict
 DropInnerMostUnitDimsTransferWrite

Allow non-zero indices when in-bounds
---
 .../Vector/Transforms/VectorTransforms.cpp    | 28 +++++++++++--
 ...tor-transfer-collapse-inner-most-dims.mlir | 42 ++++++++++++++++---
 2 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 045bc8142cea0..5e139648d25e7 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1394,9 +1394,31 @@ class DropInnerMostUnitDimsTransferWrite
     if (dimsToDrop == 0)
       return failure();
 
-    // Make sure that the indices to be dropped are equal 0.
-    // TODO: Deal with cases when the indices are not 0.
-    if (!llvm::all_of(writeOp.getIndices().take_back(dimsToDrop), isZeroIndex))
+    // We need to consider 3 cases for the dim to drop:
+    //  1. if "in bounds", it can safely be assumeed that the corresponding
+    //     index is equal to 0 (safe to collapse) (*)
+    //  2. if "out of bounds" and the corresponding index is 0, it is
+    //     effectively "in bounds" (safe to collapse)
+    //  3. If "out of bounds" and the correspondong index is != 0,
+    //     be conservative and bail out (not safe to collapse)
+    // (*)  This pattern only drops unit dims, so the only possible "in bounds"
+    // index is "0". This could be added as a folder.
+    // TODO: Deal with 3. by e.g. proppaging the "out of bounds" flag to other
+    // dims.
+    bool indexOutOfBounds = true;
+    if (writeOp.getInBounds())
+      indexOutOfBounds = llvm::any_of(
+          llvm::zip(writeOp.getInBounds()->getValue().take_back(dimsToDrop),
+                    writeOp.getIndices().take_back(dimsToDrop)),
+          [](auto zipped) {
+            auto inBounds = cast<BoolAttr>(std::get<0>(zipped)).getValue();
+            auto nonZeroIdx = !isZeroIndex(std::get<1>(zipped));
+            return !inBounds && nonZeroIdx;
+          });
+    else
+      indexOutOfBounds = !llvm::all_of(
+          writeOp.getIndices().take_back(dimsToDrop), isZeroIndex);
+    if (indexOutOfBounds)
       return failure();
 
     auto resultTargetVecType =
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 6fb3cf5ca547d..2771d05072a12 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -336,12 +336,17 @@ func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b:
 
 // -----
 
-func.func @contiguous_inner_most_non_zero_idxs(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+// Test the impact of changing the in_bounds attribute. The behaviour will
+// depend on whether the index is == 0 or != 0.
+
+// The index to be dropped is == 0, so it's safe to collapse. The other index
+// should be preserved correctly.
+func.func @contiguous_inner_most_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
   return
 }
-// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idxs(
+// CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_in_bounds(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
 // CHECK-SAME:      %[[VEC:.*]]: vector<8x1xf32>,
 // CHECK-SAME:      %[[IDX:.*]]: index) {
@@ -349,14 +354,39 @@ func.func @contiguous_inner_most_non_zero_idxs(%arg0: memref<16x1xf32>, %arg1: v
 // CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
 // CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
 
-// The index to be dropped is != 0 - this is currently not supported.
-
-func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+func.func @contiguous_inner_most_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_out_of_bounds
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<8x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) {
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
+
+// The index to be dropped is unknown, but since it's "in bounds", it has to be
+// == 0. It's safe to collapse the corresponding dim.
+
+func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
   vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
   return
 }
-// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idxs
+// CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idx_in_bounds
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<8x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) {
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
+
+func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds
 // CHECK-NOT:     memref.subview
 // CHECK-NOT:     memref.shape_cast
 // CHECK:         vector.transfer_write

>From e2a35ec137f20872bc31699454089efe6d6fd668 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 11 Jul 2024 12:16:39 +0100
Subject: [PATCH 4/6] fixup! fixup! fixup! [mlir][vector] Restrict
 DropInnerMostUnitDimsTransferWrite

Simplify as per Ben's suggestion. Given that the Op folder is guaranteed to run before the pattern, we can safely assume that the in_bounds attribute already contains all the info that we need.
---
 .../Vector/Transforms/VectorTransforms.cpp    | 28 ++-----------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 5e139648d25e7..e1fbb78a68caf 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1394,31 +1394,9 @@ class DropInnerMostUnitDimsTransferWrite
     if (dimsToDrop == 0)
       return failure();
 
-    // We need to consider 3 cases for the dim to drop:
-    //  1. if "in bounds", it can safely be assumeed that the corresponding
-    //     index is equal to 0 (safe to collapse) (*)
-    //  2. if "out of bounds" and the corresponding index is 0, it is
-    //     effectively "in bounds" (safe to collapse)
-    //  3. If "out of bounds" and the correspondong index is != 0,
-    //     be conservative and bail out (not safe to collapse)
-    // (*)  This pattern only drops unit dims, so the only possible "in bounds"
-    // index is "0". This could be added as a folder.
-    // TODO: Deal with 3. by e.g. proppaging the "out of bounds" flag to other
-    // dims.
-    bool indexOutOfBounds = true;
-    if (writeOp.getInBounds())
-      indexOutOfBounds = llvm::any_of(
-          llvm::zip(writeOp.getInBounds()->getValue().take_back(dimsToDrop),
-                    writeOp.getIndices().take_back(dimsToDrop)),
-          [](auto zipped) {
-            auto inBounds = cast<BoolAttr>(std::get<0>(zipped)).getValue();
-            auto nonZeroIdx = !isZeroIndex(std::get<1>(zipped));
-            return !inBounds && nonZeroIdx;
-          });
-    else
-      indexOutOfBounds = !llvm::all_of(
-          writeOp.getIndices().take_back(dimsToDrop), isZeroIndex);
-    if (indexOutOfBounds)
+    auto inBounds = writeOp.getInBoundsValues();
+    auto droppedInBounds = ArrayRef<bool>(inBounds).take_back(dimsToDrop);
+    if (llvm::is_contained(droppedInBounds, false))
       return failure();
 
     auto resultTargetVecType =

>From 2342b2f1fd639038fd823fc184ee013c9d2e0346 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 11 Jul 2024 15:54:51 +0100
Subject: [PATCH 5/6] fixup! fixup! fixup! [mlir][vector] Restrict
 DropInnerMostUnitDimsTransferWrite

Extend to xfer_read
---
 .../Vector/Transforms/VectorTransforms.cpp    |  6 +-
 ...tor-transfer-collapse-inner-most-dims.mlir | 70 +++++++++++++++++--
 2 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index e1fbb78a68caf..b69e9421384b0 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1300,9 +1300,9 @@ class DropInnerMostUnitDimsTransferRead
     if (dimsToDrop == 0)
       return failure();
 
-    // Make sure that the indices to be dropped are equal 0.
-    // TODO: Deal with cases when the indices are not 0.
-    if (!llvm::all_of(readOp.getIndices().take_back(dimsToDrop), isZeroIndex))
+    auto inBounds = readOp.getInBoundsValues();
+    auto droppedInBounds = ArrayRef<bool>(inBounds).take_back(dimsToDrop);
+    if (llvm::is_contained(droppedInBounds, false))
       return failure();
 
     auto resultTargetVecType =
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 2771d05072a12..3c0414c83ed68 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -113,16 +113,70 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b:
 
 // -----
 
-// The index to be dropped is != 0 - this is currently not supported.
-func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
-  %f0 = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %i], %f0 : memref<16x1xf32>, vector<8x1xf32>
+// Test the impact of changing the in_bounds attribute. The behaviour will
+// depend on whether the index is == 0 or != 0.
+
+// The index to be dropped is == 0, so it's safe to collapse. The other index
+// should be preserved correctly.
+func.func @contiguous_inner_most_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+  %pad = arith.constant 0.0 : f32
+  %c0 = arith.constant 0 : index
+  %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
+  return %1 : vector<8x1xf32>
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_in_bounds(
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) -> vector<8x1xf32> {
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32>
+// CHECK:           vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32>
+
+// The index to be dropped is == 0, so it's safe to collapse. The "out of
+// bounds" attribute is too conservative and will be folded to "in bounds"
+// before the pattern runs. The other index should be preserved correctly.
+func.func @contiguous_inner_most_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+  %pad = arith.constant 0.0 : f32
+  %c0 = arith.constant 0 : index
+  %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
+  return %1 : vector<8x1xf32>
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_out_of_bounds(
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) -> vector<8x1xf32> {
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32>
+// CHECK:           vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32>
+
+// The index to be dropped is unknown, but since it's "in bounds", it has to be
+// == 0. It's safe to collapse the corresponding dim.
+func.func @contiguous_inner_most_non_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+  %pad = arith.constant 0.0 : f32
+  %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
+  return %1 : vector<8x1xf32>
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idx_in_bounds(
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) -> vector<8x1xf32> {
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32>
+// CHECK:           vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32>
+
+// The index to be dropped is unknown and "out of bounds" - not safe to
+// collapse.
+func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+  %pad = arith.constant 0.0 : f32
+  %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
   return %1 : vector<8x1xf32>
 }
-// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idxs
+// CHECK-LABEL:   func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(
 // CHECK-NOT:     memref.subview
+// CHECK-NOT:     memref.shape_cast
 // CHECK:         vector.transfer_read
 
+
 // -----
 
 func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
@@ -354,6 +408,9 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %ar
 // CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
 // CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
 
+// The index to be dropped is == 0, so it's safe to collapse. The "out of
+// bounds" attribute is too conservative and will be folded to "in bounds"
+// before the pattern runs. The other index should be preserved correctly.
 func.func @contiguous_inner_most_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
@@ -369,7 +426,6 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>,
 
 // The index to be dropped is unknown, but since it's "in bounds", it has to be
 // == 0. It's safe to collapse the corresponding dim.
-
 func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
   vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
   return
@@ -382,6 +438,8 @@ func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf
 // CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
 // CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
 
+// The index to be dropped is unknown and "out of bounds" - not safe to
+// collapse.
 func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
   vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
   return

>From 80464e1424a138be2a9ec66f8f4bd778bce49246 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 20 Jun 2024 19:14:23 +0100
Subject: [PATCH 6/6] [mlir][vector] Update tests for collapse 5/n (nfc)

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

There's quite a few cases to consider, hence this is split into multiple
PRs. In this PR, I am simply adding more tests for
`vector.transfer_write` so that for every test for `xfer_read`, there's
a corresponding test for `xfer_write`.

This is a follow-up for: #94490, #94604, #94906, #96214
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 119 ++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 3c0414c83ed68..981c488c90283 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -449,6 +449,101 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0:
 // CHECK-NOT:     memref.shape_cast
 // CHECK:         vector.transfer_write
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_non_zero_idxs_scalable(%arg0: memref<16x1xf32>, %arg1: vector<[8]x1xf32>, %i: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idxs_scalable(
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[8]x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) {
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<[8]xf32>, memref<16xf32, strided<[1]>>
+
+// -----
+
+func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<4x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<4x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>>
+  return
+}
+
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview(
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<4x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1xf32> to vector<4xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
+
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_scalable(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<[4]x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>>
+  return
+}
+
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_scalable
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[4]x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
+
+// -----
+
+func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<4x1x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<4x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_2d(
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<4x1x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1x1xf32> to vector<4xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
+
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<[4]x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_2d_scalable
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[4]x1x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1x1xf32> to vector<[4]xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
+
 // -----
 
 func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
@@ -471,6 +566,30 @@ func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16,
 
 // -----
 
+// NOTE: This is an out-of-bounds access.
+
+func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>, %vec: vector<4x8xf32>) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x8xf32>, memref<4x1xf32>
+  return
+}
+//      CHECK: func.func @negative_non_unit_inner_vec_dim
+//  CHECK-NOT:   memref.subview
+//      CHECK:   vector.transfer_write
+
+// -----
+
+func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>, %vec: vector<4x1xf32>) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x1xf32>, memref<4x8xf32>
+  return
+}
+//      CHECK: func.func @negative_non_unit_inner_memref_dim
+//  CHECK-NOT:   memref.subview
+//      CHECK:   vector.transfer_write
+
+// -----
+
 func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %arg1: vector<16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%arg2, %c0, %c0]