[Mlir-commits] [mlir] andrzej/update collapse inner 4 (PR #94906)

Sun Jun 9 10:17:08 PDT 2024

https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/94906

- **[mlir][vector] Update tests for collapse 1/n (nfc)**
- **fixup! [mlir][vector] Update tests for collapse 1/n (nfc)**
- **[mlir][vector] Update tests for collapse 2/n (nfc)**
- **[mlir][vector] Restrict `DropInnerMostUnitDimsTransferRead`**
- **[mlir][vector] Update tests for collapse 3/n (nfc)**


>From 48c614fb8ece5a45b998c9b4206aeb80ec5d2d8b Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 5 Jun 2024 16:39:15 +0100
Subject: [PATCH 1/5] [mlir][vector] Update tests for collapse 1/n (nfc)

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

There's quite a few cases to consider, hence this is split into multiple
PRs. In this PR, the very first test is complemented with all the possible
combinations:
  * scalable (rather than fixed) unit trailing dim,
  * dynamic (rather than static) trailing dim in the source memref.

Also,
  * @leading_scalable_dimension_transfer_read and
    @trailing_scalable_one_dim_transfer_read,

are replaced with:
  * @contiguous_inner_most_scalable_inner_dim and
    @negative_scalable_unit_dim,

respectively, and added to the list above (i.e. alongside other
variations for the very first test).

In addition:
  * "_view" is removed from function names (it's not clear to me what it
    was meant to signify)
  * extra comments are added to separate tests for vector.transfer_read
    and vector.transfer_write

NOTE: This PR is limited to tests for `vector.transfer_read`.
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 91 ++++++++++++-------
 1 file changed, 59 insertions(+), 32 deletions(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index b4cb640108bae..0123709004bd6 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -1,12 +1,17 @@
 // RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s
 
-func.func @contiguous_inner_most_view(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+//-----------------------------------------------------------------------------
+// 1. vector.transfer_read
+//-----------------------------------------------------------------------------
+
+func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
   %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
   return %0 : vector<1x8x1xf32>
 }
-//      CHECK: func @contiguous_inner_most_view(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
+
+//      CHECK: func @contiguous_inner_most(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
 // CHECK-SAME:    memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>> to memref<1x1x8xf32, strided<[3072, 8, 1], offset: ?>>
 //      CHECK:   %[[VEC:.+]] = vector.transfer_read %[[SRC_0]]
@@ -14,15 +19,61 @@ func.func @contiguous_inner_most_view(%in: memref<1x1x8x1xf32, strided<[3072, 8,
 //      CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
 //      CHECK:   return %[[RESULT]]
 
+// Same as the top example within this split, but with the inner vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32>
+  return %0 : vector<1x[8]x1xf32>
+}
+
+//      CHECK: func @contiguous_inner_most_scalable_inner_dim(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
+//      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
+// CHECK-SAME:    memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>> to memref<1x1x8xf32, strided<[3072, 8, 1], offset: ?>>
+//      CHECK:   %[[VEC:.+]] = vector.transfer_read %[[SRC_0]]
+// CHECK-SAME:    memref<1x1x8xf32, strided<[3072, 8, 1], offset: ?>>, vector<1x[8]xf32>
+//      CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
+//      CHECK:   return %[[RESULT]]
+
+// Same as the top example within this split, but the trailing unit dim was
+// replaced with a dyn dim - not supported
+
+func.func @non_unit_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
+  return %0 : vector<1x8x1xf32>
+}
+
+//  CHECK-LABEL: func @non_unit_trailing_dim
+//    CHECK-NOT: memref.subview
+//    CHECK-NOT: vector.shape_cast
+
+// Same as the top example within this split, but with a scalable unit dim in
+// the output vector - not supported
+
+func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32>
+  return %0 : vector<1x8x[1]xf32>
+}
+//  CHECK-LABEL: func @scalable_unit_dim
+//    CHECK-NOT: memref.subview
+//    CHECK-NOT: vector.shape_cast
+
 // -----
 
-func.func @contiguous_outer_dyn_inner_most_view(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
+func.func @contiguous_outer_dyn_inner_most(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
   %c0 = arith.constant 0 : index
   %pad = arith.constant 0.0 : f32
   %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
   return %v : vector<8x1xf32>
 }
-// CHECK: func.func @contiguous_outer_dyn_inner_most_view(
+// CHECK: func.func @contiguous_outer_dyn_inner_most(
 // CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[SRC:[a-zA-Z0-9]+]]
@@ -103,6 +154,10 @@ func.func @contiguous_inner_most_dim_out_of_bounds_2d(%arg0: memref<1x1xf32>) ->
 
 // -----
 
+//-----------------------------------------------------------------------------
+// 2. vector.transfer_write
+//-----------------------------------------------------------------------------
+
 func.func @drop_two_inner_most_dim_for_transfer_write(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
@@ -177,21 +232,6 @@ func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], o
 
 // -----
 
-func.func @leading_scalable_dimension_transfer_read(%dest : memref<24x1xf32>) -> vector<[4]x1xf32> {
-  %c0 = arith.constant 0 : index
-  %pad = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %dest[%c0, %c0], %pad {in_bounds = [true, true]} : memref<24x1xf32>, vector<[4]x1xf32>
-  return %0 : vector<[4]x1xf32>
-}
-// CHECK:      func.func @leading_scalable_dimension_transfer_read
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0] [24, 1] [1, 1] : memref<24x1xf32> to memref<24xf32, strided<[1]>>
-// CHECK:        %[[READ:.+]] = vector.transfer_read %[[SUBVIEW]]{{.*}} {in_bounds = [true]} : memref<24xf32, strided<[1]>>, vector<[4]xf32>
-// CHECK:        %[[CAST:.+]] = vector.shape_cast %[[READ]] : vector<[4]xf32> to vector<[4]x1xf32>
-// CHECK:        return %[[CAST]]
-
-// -----
-
 // Negative test: [1] (scalable 1) is _not_ a unit dimension.
 func.func @trailing_scalable_one_dim_transfer_read(%dest : memref<24x1xf32>) -> vector<4x[1]xf32> {
   %c0 = arith.constant 0 : index
@@ -217,16 +257,3 @@ func.func @leading_scalable_dimension_transfer_write(%dest : memref<24x1xf32>, %
 // CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0] [24, 1] [1, 1] : memref<24x1xf32> to memref<24xf32, strided<[1]>>
 // CHECK:        %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
 // CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]{{.*}} {in_bounds = [true]} : vector<[4]xf32>, memref<24xf32, strided<[1]>>
-
-// -----
-
-// Negative test: [1] (scalable 1) is _not_ a unit dimension.
-func.func @trailing_scalable_one_dim_transfer_write(%dest : memref<24x1xf32>, %vec: vector<4x[1]xf32>, %index: index) {
-  %c0 = arith.constant 0 : index
-  vector.transfer_write %vec, %dest[%index, %c0] {in_bounds = [true, true]} : vector<4x[1]xf32>,  memref<24x1xf32>
-  return
-}
-// CHECK:      func.func @trailing_scalable_one_dim_transfer_write
-// CHECK-NOT:    vector.shape_cast
-// CHECK:        vector.transfer_write {{.*}} : vector<4x[1]xf32>,  memref<24x1xf32>
-// CHECK-NOT:    vector.shape_cast

>From e9731a0a4ec6e26148c47895ed64a097e7090b3c Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 6 Jun 2024 18:07:09 +0100
Subject: [PATCH 2/5] fixup! [mlir][vector] Update tests for collapse 1/n (nfc)

Fix failing test
---
 .../vector-transfer-collapse-inner-most-dims.mlir | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 0123709004bd6..9b23681dba6a8 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -61,7 +61,7 @@ func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8,
   %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32>
   return %0 : vector<1x8x[1]xf32>
 }
-//  CHECK-LABEL: func @scalable_unit_dim
+//  CHECK-LABEL: func @negative_scalable_unit_dim
 //    CHECK-NOT: memref.subview
 //    CHECK-NOT: vector.shape_cast
 
@@ -257,3 +257,16 @@ func.func @leading_scalable_dimension_transfer_write(%dest : memref<24x1xf32>, %
 // CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0] [24, 1] [1, 1] : memref<24x1xf32> to memref<24xf32, strided<[1]>>
 // CHECK:        %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
 // CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]{{.*}} {in_bounds = [true]} : vector<[4]xf32>, memref<24xf32, strided<[1]>>
+
+// -----
+
+// Negative test: [1] (scalable 1) is _not_ a unit dimension.
+func.func @trailing_scalable_one_dim_transfer_write(%dest : memref<24x1xf32>, %vec: vector<4x[1]xf32>, %index: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %vec, %dest[%index, %c0] {in_bounds = [true, true]} : vector<4x[1]xf32>,  memref<24x1xf32>
+  return
+}
+// CHECK:      func.func @trailing_scalable_one_dim_transfer_write
+// CHECK-NOT:    vector.shape_cast
+// CHECK:        vector.transfer_write {{.*}} : vector<4x[1]xf32>,  memref<24x1xf32>
+// CHECK-NOT:    vector.shape_cast

>From f246e3a18d978f78f88f79c1fea1bac4b79103c8 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 6 Jun 2024 11:21:45 +0100
Subject: [PATCH 3/5] [mlir][vector] Update tests for collapse 2/n (nfc)

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

Changes in this PR:

1. Renamed `@contiguous_inner_most_dim_bounds` as
   `@contiguous_inner_most_dim_with_subview`. This test was introduced
   to make sure that the `in_bounds` attribute is correctly preserved,
   but that's already verified by some earlier tests. The updated name
   highlights the differentiating factor of this test when compared to
   the other tests _currently_ present in the file, i.e. the presence of
   `memref.subview` in the input IR.

2. Renamed `@contiguous_inner_most_dim_out_of_bounds_2d` as
   `@negative_non_unit_inner_vec_dim`. While this test does contain an
   out-of-bounds access, the actual reason for the tested pattern to
   fail is the fact that the inner dim in the output vector is not "1".
   A complimentary test was added to verify that the pattern also fails
   when the source memref has non-unit trailing dim
   (`@negative_non_unit_inner_memref_dim`).

3. Renamed `@contiguous_inner_most_dim` as
   `@contiguous_inner_most_dim_non_zero_idxs` - this test verifies that
   the pattern works in the presence of non-zero idxs.

4. Added more tests for scalable vectors - this should cover all cases
   for `vector.transfer_read`.

NOTE: This PR is limited to tests for `vector.transfer_read`.

Follow-up for: #94490
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 136 +++++++++++++-----
 1 file changed, 104 insertions(+), 32 deletions(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 9b23681dba6a8..a50c01898c62e 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -53,7 +53,7 @@ func.func @non_unit_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1
 //    CHECK-NOT: vector.shape_cast
 
 // Same as the top example within this split, but with a scalable unit dim in
-// the output vector - not supported
+// the output vector - not supported (scalable 1 is _not_ a unit dimension).
 
 func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{
   %c0 = arith.constant 0 : index
@@ -67,13 +67,13 @@ func.func @negative_scalable_unit_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8,
 
 // -----
 
-func.func @contiguous_outer_dyn_inner_most(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
+func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
   %c0 = arith.constant 0 : index
   %pad = arith.constant 0.0 : f32
   %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
   return %v : vector<8x1xf32>
 }
-// CHECK: func.func @contiguous_outer_dyn_inner_most(
+// CHECK: func.func @contiguous_inner_most_dynamic_outer
 // CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[SRC:[a-zA-Z0-9]+]]
@@ -89,68 +89,154 @@ func.func @contiguous_outer_dyn_inner_most(%a: index, %b: index, %memref: memref
 // CHECK:        %[[RESULT:.+]] = vector.shape_cast %[[VEC]]
 // CHECK:        return %[[RESULT]]
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<[8]x1xf32> {
+  %c0 = arith.constant 0 : index
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<[8]x1xf32>
+  return %v : vector<[8]x1xf32>
+}
+// CHECK-LABEL:  func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim
+// CHECK-SAME:   %[[IDX0:[a-zA-Z0-9]+]]
+// CHECK-SAME:   %[[IDX1:[a-zA-Z0-9]+]]
+// CHECK-SAME:   %[[SRC:[a-zA-Z0-9]+]]
+// CHECK:         %[[VIEW:.+]] = memref.subview %[[SRC]]{{.*}} memref<?x?x8x1xf32> to memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>
+// CHECK:         %[[VEC_READ:.+]] = vector.transfer_read %[[VIEW]]
+// CHECK-SAME:    {in_bounds = [true]}
+// CHECK-SAME:     memref<?x?x8xf32, strided<[?, 8, 1], offset: ?>>, vector<[8]xf32>
+// CHECK:         vector.shape_cast %[[VEC_READ]]
+
 // -----
 
-func.func @contiguous_inner_most_dim(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_dim_non_zero_idxs(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) {
   %c0 = arith.constant 0 : index
   %f0 = arith.constant 0.0 : f32
   %1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<8x1xf32>
   return %1 : vector<8x1xf32>
 }
-//      CHECK: func @contiguous_inner_most_dim(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
+//      CHECK: func @contiguous_inner_most_dim_non_zero_idxs(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
 // CHECK-SAME:     memref<16x1xf32> to memref<16xf32, strided<[1]>>
 //      CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]
-//      CHECK:   %[[RESULT]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
+//      CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
 //      CHECK:   return %[[RESULT]]
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<[8]x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %f0 = arith.constant 0.0 : f32
+  %1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<[8]x1xf32>
+  return %1 : vector<[8]x1xf32>
+}
+// CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(
+// CHECK-SAME:    %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<[8]x1xf32>
+//       CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
+//  CHECK-SAME:     memref<16x1xf32> to memref<16xf32, strided<[1]>>
+//       CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]
+//       CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<[8]xf32> to vector<[8]x1xf32>
+//       CHECK:   return %[[RESULT]]
+
 // -----
 
-func.func @contiguous_inner_most_dim_bounds(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
   %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
   %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32>
   return %1 : vector<4x1xf32>
 }
-//      CHECK: func @contiguous_inner_most_dim_bounds(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
+//      CHECK: func @contiguous_inner_most_dim_with_subview(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
 //      CHECK:   %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
 //      CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_1]]
 // CHECK-SAME:       {in_bounds = [true]}
 // CHECK-SAME:       vector<4xf32>
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32>
+  return %1 : vector<[4]x1xf32>
+}
+// CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
+//  CHECK-SAME:   %[[SRC:.+]]: memref<1000x1xf32>
+//       CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
+//       CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]
+//  CHECK-SAME:       {in_bounds = [true]}
+//  CHECK-SAME:       vector<[4]xf32>
+
 // -----
 
-func.func @contiguous_inner_most_dim_bounds_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
   %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
   %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32>
   return %1 : vector<4x1x1xf32>
 }
-//      CHECK: func @contiguous_inner_most_dim_bounds_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
+//      CHECK: func @contiguous_inner_most_dim_with_subview_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
 //      CHECK:   %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
 //      CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_1]]
 // CHECK-SAME:       {in_bounds = [true]}
 // CHECK-SAME:       vector<4xf32>
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32>
+  return %1 : vector<[4]x1x1xf32>
+}
+// CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(
+//  CHECK-SAME:   %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32>
+//       CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
+//       CHECK:   %[[SRC_1:.+]] = memref.subview %[[SRC_0]]
+//       CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_1]]
+//  CHECK-SAME:       {in_bounds = [true]}
+//  CHECK-SAME:       vector<[4]xf32>
+//       CHECK:  vector.shape_cast %[[V]]
+
 // -----
 
-func.func @contiguous_inner_most_dim_out_of_bounds_2d(%arg0: memref<1x1xf32>) -> vector<4x8xf32> {
+// NOTE: This is an out-of-bounds access.
+
+func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8xf32> {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<1x1xf32>, vector<4x8xf32>
+  %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x1xf32>, vector<4x8xf32>
   return %0 : vector<4x8xf32>
 }
-// The inner most unit dim can not be dropped. In this context, we do not
-// generate rank-reduced memref.subview ops.
-//      CHECK: func.func @contiguous_inner_most_dim_out_of_bounds_2d
-// CHECK-SAME:   %[[SRC:[a-zA-Z0-9]+]]
+//      CHECK: func.func @negative_non_unit_inner_vec_dim
+//  CHECK-NOT:   memref.subview
+//      CHECK:   vector.transfer_read
+
+// -----
+
+func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<4x1xf32> {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x8xf32>, vector<4x1xf32>
+  return %0 : vector<4x1xf32>
+}
+//      CHECK: func.func @negative_non_unit_inner_memref_dim
 //  CHECK-NOT:   memref.subview
-//      CHECK:   %[[READ:.+]] = vector.transfer_read %[[SRC]]
-//      CHECK:   return %[[READ]] : vector<4x8xf32>
+//      CHECK:   vector.transfer_read
 
 // -----
 
@@ -232,20 +318,6 @@ func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], o
 
 // -----
 
-// Negative test: [1] (scalable 1) is _not_ a unit dimension.
-func.func @trailing_scalable_one_dim_transfer_read(%dest : memref<24x1xf32>) -> vector<4x[1]xf32> {
-  %c0 = arith.constant 0 : index
-  %pad = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %dest[%c0, %c0], %pad {in_bounds = [true, true]} : memref<24x1xf32>, vector<4x[1]xf32>
-  return %0 : vector<4x[1]xf32>
-}
-// CHECK:      func.func @trailing_scalable_one_dim_transfer_read
-// CHECK-NOT:    vector.shape_cast
-// CHECK:        vector.transfer_read {{.*}} : memref<24x1xf32>, vector<4x[1]xf32>
-// CHECK-NOT:    vector.shape_cast
-
-// -----
-
 func.func @leading_scalable_dimension_transfer_write(%dest : memref<24x1xf32>, %vec: vector<[4]x1xf32>) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %vec, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[4]x1xf32>,  memref<24x1xf32>

>From 3a27b5bb03dfbb42f39c336562d0d6d6e94c0779 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Sun, 9 Jun 2024 16:38:18 +0100
Subject: [PATCH 4/5] [mlir][vector] Restrict
 `DropInnerMostUnitDimsTransferRead`

Restrict `DropInnerMostUnitDimsTransferRead` so that it fails when one
of the indices to be dropped could be != 0, e.g.

```
func.func @negative_example(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) {
  %f0 = arith.constant 0.0 : f32
  %1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<8x1xf32>
  return %1 : vector<8x1xf32>
}
```

This is an edge case that could represent an out-of-bounds access,
though that will depend on the actual value of `%j`.

NOTE: This PR is limited to tests for `vector.transfer_read`.

Depends on: #94490, #94604
---
 .../Vector/Transforms/VectorTransforms.cpp    | 15 ++++++++++++
 ...tor-transfer-collapse-inner-most-dims.mlir | 24 +++++++++++++------
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index f29eba90c3ceb..caf1506e0db93 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1293,6 +1293,21 @@ class DropInnerMostUnitDimsTransferRead
     if (dimsToDrop == 0)
       return failure();
 
+    // Make sure that the indixes to be dropped are equal 0.
+    // TODO: Deal with cases when the indices are not 0.
+    auto isZeroIdx = [](Value idx) {
+      Attribute attr;
+      APInt value;
+      if (!matchPattern(idx, m_Constant(&attr)))
+        return false;
+      if (matchPattern(attr, m_ConstantInt(&value)))
+        if (!value.isZero())
+          return false;
+      return true;
+    };
+    if (!llvm::all_of(readOp.getIndices().take_back(dimsToDrop), isZeroIdx))
+      return failure();
+
     auto resultTargetVecType =
         VectorType::get(targetType.getShape().drop_back(dimsToDrop),
                         targetType.getElementType(),
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index a50c01898c62e..bb37d5b45520c 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -111,31 +111,41 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b:
 
 // -----
 
-func.func @contiguous_inner_most_dim_non_zero_idxs(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_dim_non_zero_idx(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %c0 = arith.constant 0 : index
   %f0 = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<8x1xf32>
+  %1 = vector.transfer_read %A[%i, %c0], %f0 : memref<16x1xf32>, vector<8x1xf32>
   return %1 : vector<8x1xf32>
 }
-//      CHECK: func @contiguous_inner_most_dim_non_zero_idxs(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<8x1xf32>
+//      CHECK: func @contiguous_inner_most_dim_non_zero_idx(%[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index) -> vector<8x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
 // CHECK-SAME:     memref<16x1xf32> to memref<16xf32, strided<[1]>>
 //      CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]
 //      CHECK:   %[[RESULT:.+]] = vector.shape_cast %[[V]] : vector<8xf32> to vector<8x1xf32>
 //      CHECK:   return %[[RESULT]]
 
+// The index to be dropped is != 0 - this is currently not supported.
+func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+  %f0 = arith.constant 0.0 : f32
+  %1 = vector.transfer_read %A[%i, %i], %f0 : memref<16x1xf32>, vector<8x1xf32>
+  return %1 : vector<8x1xf32>
+}
+// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idxs
+// CHECK-NOT:     memref.subview
+// CHECK:         vector.transfer_read
+
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
 // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(%A: memref<16x1xf32>, %i:index, %j:index) -> (vector<[8]x1xf32>) {
+func.func @contiguous_inner_most_dim_non_zero_idx_scalable_inner_dim(%A: memref<16x1xf32>, %i:index) -> (vector<[8]x1xf32>) {
   %c0 = arith.constant 0 : index
   %f0 = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %j], %f0 : memref<16x1xf32>, vector<[8]x1xf32>
+  %1 = vector.transfer_read %A[%i, %c0], %f0 : memref<16x1xf32>, vector<[8]x1xf32>
   return %1 : vector<[8]x1xf32>
 }
-// CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idxs_scalable_inner_dim(
-// CHECK-SAME:    %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index, %[[J:.+]]: index) -> vector<[8]x1xf32>
+// CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idx_scalable_inner_dim(
+// CHECK-SAME:    %[[SRC:.+]]: memref<16x1xf32>, %[[I:.+]]: index) -> vector<[8]x1xf32>
 //       CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
 //  CHECK-SAME:     memref<16x1xf32> to memref<16xf32, strided<[1]>>
 //       CHECK:   %[[V:.+]] = vector.transfer_read %[[SRC_0]]

>From e1bb6183409ea5c0862b629c607c31d21033eb17 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Sun, 9 Jun 2024 17:54:16 +0100
Subject: [PATCH 5/5] [mlir][vector] Update tests for collapse 3/n (nfc)

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

There's quite a few cases to consider, hence this is split into multiple
PRs. In this PR, the very first test for `vector.transfer_write` is
complemented with all the possible combinations:
  * scalable (rather than fixed) unit trailing dim,
  * dynamic (rather than static) trailing dim in the source memref.

To this end, the following tests:
  * `@leading_scalable_dimension_transfer_write`
    `@trailing_scalable_one_dim_transfer_write`

are replaced with:
  * `@drop_two_inner_most_dim_scalable_inner_dim` and
    `@negative_scalable_unit_dim`,

respectively. In addition:
  * "_for_transfer_write" is removed from function names (to reduce
    noise).

This is a follow-up for: #94490, #94604

NOTE: This PR is limited to tests for `vector.transfer_write`.
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 90 ++++++++++++-------
 1 file changed, 57 insertions(+), 33 deletions(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index bb37d5b45520c..4f8aab3729de8 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -254,14 +254,14 @@ func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<
 // 2. vector.transfer_write
 //-----------------------------------------------------------------------------
 
-func.func @drop_two_inner_most_dim_for_transfer_write(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
+func.func @drop_two_inner_most_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true, true]}
     : vector<1x16x16x1x1xf32>, memref<1x512x16x1x1xf32>
   return
 }
-// CHECK:      func.func @drop_two_inner_most_dim_for_transfer_write
+// CHECK:      func.func @drop_two_inner_most_dim
 // CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
@@ -272,16 +272,67 @@ func.func @drop_two_inner_most_dim_for_transfer_write(%arg0: memref<1x512x16x1x1
 // CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]
 // CHECK-SAME:     [%[[C0]], %[[IDX]], %[[C0]]]
 
+// Same as the top example within this split, but with the inner vector
+// dim scalable. Note that this example only makes sense when "16 = [16]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @drop_two_inner_most_dim_scalable_inner_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x[16]x1x1xf32>, %arg2: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
+    {in_bounds = [true, true, true, true, true]}
+    : vector<1x16x[16]x1x1xf32>, memref<1x512x16x1x1xf32>
+  return
+}
+// CHECK:      func.func @drop_two_inner_most_dim_scalable_inner_dim
+// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
+// CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
+// CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
+// CHECK-DAG:    %[[C0:.+]] = arith.constant 0 : index
+// CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]]
+// CHECK-SAME:     memref<1x512x16x1x1xf32> to memref<1x512x16xf32, strided<[8192, 16, 1]>>
+// CHECK:        %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<1x16x[16]x1x1xf32> to vector<1x16x[16]xf32>
+// CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]
+// CHECK-SAME:     [%[[C0]], %[[IDX]], %[[C0]]]
+
+// Same as the top example within this split, but the trailing unit dim was
+// replaced with a dyn dim - not supported
+
+func.func @negative_non_unit_trailing_dim(%arg0: memref<1x512x16x1x?xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
+    {in_bounds = [true, true, true, true, true]}
+    : vector<1x16x16x1x1xf32>, memref<1x512x16x1x?xf32>
+  return
+}
+// CHECK:      func.func @negative_non_unit_trailing_dim
+// CHECK-NOT: memref.subview
+// CHECK-NOT: vector.shape_cast
+
+// Same as the top example within this split, but with a scalable unit dim in
+// the output vector - not supported
+
+func.func @negative_scalable_unit_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x[1]xf32>, %arg2: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
+    {in_bounds = [true, true, true, true, true]}
+    : vector<1x16x16x1x[1]xf32>, memref<1x512x16x1x1xf32>
+  return
+}
+
+// CHECK:     func.func @negative_scalable_unit_dim
+// CHECK-NOT: memref.subview
+// CHECK-NOT: vector.shape_cast
+
 // -----
 
-func.func @drop_inner_most_dim_for_transfer_write(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
+func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0]
     {in_bounds = [true, true, true, true]}
     : vector<1x16x16x1xf32>, memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>
   return
 }
-// CHECK:      func.func @drop_inner_most_dim_for_transfer_write
+// CHECK:      func.func @drop_inner_most_dim
 // CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
@@ -294,14 +345,14 @@ func.func @drop_inner_most_dim_for_transfer_write(%arg0: memref<1x512x16x1xf32,
 
 // -----
 
-func.func @outer_dyn_drop_inner_most_dim_for_transfer_write(%arg0: memref<?x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
+func.func @outer_dyn_drop_inner_most_dim(%arg0: memref<?x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%arg2, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true]}
     : vector<1x16x16x1xf32>, memref<?x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>
   return
 }
-// CHECK:      func.func @outer_dyn_drop_inner_most_dim_for_transfer_write
+// CHECK:      func.func @outer_dyn_drop_inner_most_dim
 // CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
@@ -325,30 +376,3 @@ func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], o
 // The inner most unit dims can not be dropped if the strides are not ones.
 // CHECK:     func.func @non_unit_strides
 // CHECK-NOT:   memref.subview
-
-// -----
-
-func.func @leading_scalable_dimension_transfer_write(%dest : memref<24x1xf32>, %vec: vector<[4]x1xf32>) {
-  %c0 = arith.constant 0 : index
-  vector.transfer_write %vec, %dest[%c0, %c0] {in_bounds = [true, true]} : vector<[4]x1xf32>,  memref<24x1xf32>
-  return
-}
-// CHECK:      func.func @leading_scalable_dimension_transfer_write
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
-// CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0] [24, 1] [1, 1] : memref<24x1xf32> to memref<24xf32, strided<[1]>>
-// CHECK:        %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
-// CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]{{.*}} {in_bounds = [true]} : vector<[4]xf32>, memref<24xf32, strided<[1]>>
-
-// -----
-
-// Negative test: [1] (scalable 1) is _not_ a unit dimension.
-func.func @trailing_scalable_one_dim_transfer_write(%dest : memref<24x1xf32>, %vec: vector<4x[1]xf32>, %index: index) {
-  %c0 = arith.constant 0 : index
-  vector.transfer_write %vec, %dest[%index, %c0] {in_bounds = [true, true]} : vector<4x[1]xf32>,  memref<24x1xf32>
-  return
-}
-// CHECK:      func.func @trailing_scalable_one_dim_transfer_write
-// CHECK-NOT:    vector.shape_cast
-// CHECK:        vector.transfer_write {{.*}} : vector<4x[1]xf32>,  memref<24x1xf32>
-// CHECK-NOT:    vector.shape_cast