[Mlir-commits] [mlir] 4ecb538 - [mlir][vector] Update tests for collapse 6/n (nfc) (#98902)

Tue Jul 16 08:42:01 PDT 2024

Author: Andrzej Warzyński
Date: 2024-07-16T16:41:57+01:00
New Revision: 4ecb5383e12f1be91d6de06bb377d35964c01a7a

URL: https://github.com/llvm/llvm-project/commit/4ecb5383e12f1be91d6de06bb377d35964c01a7a
DIFF: https://github.com/llvm/llvm-project/commit/4ecb5383e12f1be91d6de06bb377d35964c01a7a.diff

LOG: [mlir][vector] Update tests for collapse 6/n (nfc) (#98902)

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

There's quite a few cases to consider, hence this is split into multiple
PRs.

In this PR, I am making the following changes:
* All input memrefs for `xfer_read` are are renamed as `%src`.
* All input memrefs for `xfer_write` are are renamed as `%dest`.
* All variables representing pad values for `xfer_read` are renamed as
  `%pad`.
* All vector variables (for `xfer_read` and `xfer_write`) are renamed as
  `%v`.
* Add `@contiguous_inner_most_non_zero_idx_in_bounds_scalable` for
  `xfer_read` (similar test already exists for `xfer_write`)
* All index variables are renamed as `%i` (1st index) and `%ii` (2nd
  index).

The above were marked as TODOs in the test file - these are not
resolved. In addition (to avoid sending another PR):
* `@drop_inner_most_dim` is deleted - it duplicates
  `@contiguous_inner_most` for xfer_write
* For consistency with other negative tests, renamed `@non_unit_strides`
  as `@negative_non_unit_strides` and added a similar test for
  `xfer_read`
* `@non_unit_strides` is renamed as `@negative_non_unit_strides` and
  a similar test is added for `xfer_read`.

This is a follow-up for: #94490, #94604, #94906, #96214, #96227

Added: 
    

Modified: 
    mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index beeb1362b7ab3..bc3c3c014c4d2 100644

--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -1,16 +1,14 @@
 // RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s
 
-// TODO: Unify how memref and vectors are named
-
 //-----------------------------------------------------------------------------
 // 1. vector.transfer_read
 //-----------------------------------------------------------------------------
 
-func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+func.func @contiguous_inner_most(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
-  return %0 : vector<1x8x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
+  return %v : vector<1x8x1xf32>
 }
 
 //      CHECK: func @contiguous_inner_most(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
@@ -23,13 +21,13 @@ func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1
 
 // Same as the top example within this split, but with the inner vector
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{
+func.func @contiguous_inner_most_scalable_inner_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32>
-  return %0 : vector<1x[8]x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32>
+  return %v : vector<1x[8]x1xf32>
 }
 
 //      CHECK: func @contiguous_inner_most_scalable_inner_dim(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
@@ -43,11 +41,11 @@ func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, str
 // Same as the top example within this split, but the trailing unit dim was
 // replaced with a dyn dim - not supported
 
-func.func @negative_dynamic_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+func.func @negative_dynamic_trailing_dim(%src: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
-  return %0 : vector<1x8x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
+  return %v : vector<1x8x1xf32>
 }
 
 //  CHECK-LABEL: func @negative_dynamic_trailing_dim
@@ -57,11 +55,11 @@ func.func @negative_dynamic_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072,
 // Same as the top example within this split, but with a "scalable unit" dim in
 // the output vector - not supported (scalable 1, [1], is _not_ a unit dimension).
 
-func.func @negative_scalable_one_trailing_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{
+func.func @negative_scalable_one_trailing_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32>
-  return %0 : vector<1x8x[1]xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32>
+  return %v : vector<1x8x[1]xf32>
 }
 //  CHECK-LABEL: func @negative_scalable_one_trailing_dim
 //    CHECK-NOT: memref.subview
@@ -69,10 +67,10 @@ func.func @negative_scalable_one_trailing_dim(%in: memref<1x1x8x1xf32, strided<[
 
 // -----
 
-func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
+func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
   %c0 = arith.constant 0 : index
   %pad = arith.constant 0.0 : f32
-  %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
+  %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
   return %v : vector<8x1xf32>
 }
 // CHECK: func.func @contiguous_inner_most_dynamic_outer
@@ -93,12 +91,12 @@ func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: me
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<[8]x1xf32> {
+func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%i: index, %ii: index, %memref: memref<?x?x8x1xf32>) -> vector<[8]x1xf32> {
   %c0 = arith.constant 0 : index
   %pad = arith.constant 0.0 : f32
-  %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<[8]x1xf32>
+  %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<[8]x1xf32>
   return %v : vector<[8]x1xf32>
 }
 // CHECK-LABEL:  func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim
@@ -118,11 +116,11 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b:
 
 // The index to be dropped is == 0, so it's safe to collapse. The other index
 // should be preserved correctly.
-func.func @contiguous_inner_most_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
   %c0 = arith.constant 0 : index
-  %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_in_bounds(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
@@ -135,11 +133,11 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:ind
 // The index to be dropped is == 0, so it's safe to collapse. The "out of
 // bounds" attribute is too conservative and will be folded to "in bounds"
 // before the pattern runs. The other index should be preserved correctly.
-func.func @contiguous_inner_most_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
   %c0 = arith.constant 0 : index
-  %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_out_of_bounds(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
@@ -151,10 +149,10 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i
 
 // The index to be dropped is unknown, but since it's "in bounds", it has to be
 // == 0. It's safe to collapse the corresponding dim.
-func.func @contiguous_inner_most_non_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_non_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idx_in_bounds(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
@@ -164,12 +162,29 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds(%A: memref<16x1xf32>, %i
 // CHECK:           %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32>
 // CHECK:           vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32>
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%src: memref<16x1xf32>, %i:index) -> (vector<[8]x1xf32>) {
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<[8]x1xf32>
+  return %v : vector<[8]x1xf32>
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) -> vector<[8]x1xf32> {
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<[8]xf32>
+// CHECK:           vector.shape_cast %[[READ]] : vector<[8]xf32> to vector<[8]x1xf32>
+
 // The index to be dropped is unknown and "out of bounds" - not safe to
 // collapse.
-func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(
 // CHECK-NOT:     memref.subview
@@ -179,12 +194,12 @@ func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%A: memref<
 
 // -----
 
-func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview(%src: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32>
-  return %1 : vector<4x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32>
+  return %v : vector<4x1xf32>
 }
 //      CHECK: func @contiguous_inner_most_dim_with_subview(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
@@ -195,14 +210,14 @@ func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:ind
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%src: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32>
-  return %1 : vector<[4]x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32>
+  return %v : vector<[4]x1xf32>
 }
 // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
 //  CHECK-SAME:   %[[SRC:.+]]: memref<1000x1xf32>
@@ -213,12 +228,12 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<
 
 // -----
 
-func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d(%src: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32>
-  return %1 : vector<4x1x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32>
+  return %v : vector<4x1x1xf32>
 }
 //      CHECK: func @contiguous_inner_most_dim_with_subview_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
@@ -229,14 +244,14 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%src: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32>
-  return %1 : vector<[4]x1x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32>
+  return %v : vector<[4]x1x1xf32>
 }
 // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(
 //  CHECK-SAME:   %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32>
@@ -251,11 +266,11 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memr
 
 // NOTE: This is an out-of-bounds access.
 
-func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8xf32> {
+func.func @negative_non_unit_inner_vec_dim(%src: memref<4x1xf32>) -> vector<4x8xf32> {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x1xf32>, vector<4x8xf32>
-  return %0 : vector<4x8xf32>
+  %pad = arith.constant 0.000000e+00 : f32
+  %v = vector.transfer_read %src[%c0, %c0], %pad : memref<4x1xf32>, vector<4x8xf32>
+  return %v : vector<4x8xf32>
 }
 //      CHECK: func.func @negative_non_unit_inner_vec_dim
 //  CHECK-NOT:   memref.subview
@@ -263,11 +278,11 @@ func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8
 
 // -----
 
-func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<4x1xf32> {
+func.func @negative_non_unit_inner_memref_dim(%src: memref<4x8xf32>) -> vector<4x1xf32> {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x8xf32>, vector<4x1xf32>
-  return %0 : vector<4x1xf32>
+  %pad = arith.constant 0.000000e+00 : f32
+  %v = vector.transfer_read %src[%c0, %c0], %pad : memref<4x8xf32>, vector<4x1xf32>
+  return %v : vector<4x1xf32>
 }
 //      CHECK: func.func @negative_non_unit_inner_memref_dim
 //  CHECK-NOT:   memref.subview
@@ -275,13 +290,28 @@ func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<
 
 // -----
 
+// The inner most unit dims can not be dropped if the strides are not ones.
+
+func.func @negative_non_unit_strides(%src: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %i: index) -> vector<16x16x1xf32> {
+  %c0 = arith.constant 0 : index
+  %pad = arith.constant 0.000000e+00 : f32
+  %v = vector.transfer_read %src[%i, %c0, %c0], %pad
+    {in_bounds = [true, true, true]}
+    : memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, vector<16x16x1xf32>
+  return %v : vector<16x16x1xf32>
+}
+// CHECK:     func.func @negative_non_unit_strides
+// CHECK-NOT:   memref.subview
+
+// -----
+
 //-----------------------------------------------------------------------------
 // 2. vector.transfer_write
 //-----------------------------------------------------------------------------
 
-func.func @contiguous_inner_most(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
+func.func @contiguous_inner_most(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x16x1x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
+  vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true, true]}
     : vector<1x16x16x1x1xf32>, memref<1x512x16x1x1xf32>
   return
@@ -299,11 +329,11 @@ func.func @contiguous_inner_most(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<
 
 // Same as the top example within this split, but with the inner vector
 // dim scalable. Note that this example only makes sense when "16 = [16]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_scalable_inner_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x[16]x1x1xf32>, %arg2: index) {
+func.func @contiguous_inner_most_scalable_inner_dim(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x[16]x1x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
+  vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true, true]}
     : vector<1x16x[16]x1x1xf32>, memref<1x512x16x1x1xf32>
   return
@@ -322,9 +352,9 @@ func.func @contiguous_inner_most_scalable_inner_dim(%arg0: memref<1x512x16x1x1xf
 // Same as the top example within this split, but the trailing unit dim was
 // replaced with a dyn dim - not supported
 
-func.func @negative_dynamic_trailing_dim(%arg0: memref<1x512x16x1x?xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
+func.func @negative_dynamic_trailing_dim(%dest: memref<1x512x16x1x?xf32>, %v: vector<1x16x16x1x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
+  vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true, true]}
     : vector<1x16x16x1x1xf32>, memref<1x512x16x1x?xf32>
   return
@@ -336,9 +366,9 @@ func.func @negative_dynamic_trailing_dim(%arg0: memref<1x512x16x1x?xf32>, %arg1:
 // Same as the top example within this split, but with a "scalable unit" dim in
 // the input vector - not supported (scalable 1, [1], is _not_ a unit dimension).
 
-func.func @negative_scalable_one_trailing_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x[1]xf32>, %arg2: index) {
+func.func @negative_scalable_one_trailing_dim(%dest: memref<1x512x16x1x1xf32>, %v: vector<1x16x16x1x[1]xf32>, %i: index) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
+  vector.transfer_write %v, %dest[%c0, %i, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true, true]}
     : vector<1x16x16x1x[1]xf32>, memref<1x512x16x1x1xf32>
   return
@@ -350,9 +380,9 @@ func.func @negative_scalable_one_trailing_dim(%arg0: memref<1x512x16x1x1xf32>, %
 
 // -----
 
-func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %arg0: memref<?x?x16x1xf32>, %arg1: vector<8x1xf32>) {
+func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %dest: memref<?x?x16x1xf32>, %v: vector<8x1xf32>) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%a, %b, %c0, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<?x?x16x1xf32>
+  vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<?x?x16x1xf32>
   return
 }
 // CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer(
@@ -369,11 +399,11 @@ func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %arg0: memr
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b: index, %arg0: memref<?x?x16x1xf32>, %arg1: vector<[8]x1xf32>) {
+func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%i: index, %ii: index, %dest: memref<?x?x16x1xf32>, %v: vector<[8]x1xf32>) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%a, %b, %c0, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<?x?x16x1xf32>
+  vector.transfer_write %v, %dest[%i, %ii, %c0, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<?x?x16x1xf32>
   return
 }
 // CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(
@@ -395,9 +425,9 @@ func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b:
 
 // The index to be dropped is == 0, so it's safe to collapse. The other index
 // should be preserved correctly.
-func.func @contiguous_inner_most_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+func.func @contiguous_inner_most_zero_idx_in_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
+  vector.transfer_write %v, %dest[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
   return
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_in_bounds(
@@ -411,9 +441,9 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %ar
 // The index to be dropped is == 0, so it's safe to collapse. The "out of
 // bounds" attribute is too conservative and will be folded to "in bounds"
 // before the pattern runs. The other index should be preserved correctly.
-func.func @contiguous_inner_most_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+func.func @contiguous_inner_most_zero_idx_out_of_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
+  vector.transfer_write %v, %dest[%i, %c0] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
   return
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_out_of_bounds
@@ -426,8 +456,8 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>,
 
 // The index to be dropped is unknown, but since it's "in bounds", it has to be
 // == 0. It's safe to collapse the corresponding dim.
-func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
-  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
+func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) {
+  vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
   return
 }
 // CHECK-LABEL: func @contiguous_inner_most_dim_non_zero_idx_in_bounds
@@ -442,10 +472,8 @@ func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
 // vscale = 1). This is assumed via the `in_bounds` attribute.
 
-// TODO: Add a similar test for xfer_write
-
-func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%arg0: memref<16x1xf32>, %arg1: vector<[8]x1xf32>, %i: index) {
-  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32>
+func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%dest: memref<16x1xf32>, %v: vector<[8]x1xf32>, %i: index) {
+  vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32>
   return
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(
@@ -458,8 +486,8 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%arg0: memref<1
 
 // The index to be dropped is unknown and "out of bounds" - not safe to
 // collapse.
-func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
-  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
+func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%dest: memref<16x1xf32>, %v: vector<8x1xf32>, %i: index) {
+  vector.transfer_write %v, %dest[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
   return
 }
 // CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds
@@ -471,10 +499,10 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0:
 
 // Verify that the transformation does work even when the input is a "subview"
 
-func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<4x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview(%dest: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<4x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  %0 = memref.subview %dest[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
   vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<4x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>>
   return
 }
@@ -492,10 +520,10 @@ func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:ind
 // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
 // vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%dest: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  %0 = memref.subview %dest[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
   vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<[4]x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>>
   return
 }
@@ -511,10 +539,10 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<
 
 // -----
 
-func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<4x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d(%dest: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<4x1x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  %0 = memref.subview %dest[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
   vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<4x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
   return
 }
@@ -529,12 +557,12 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%dest: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  %0 = memref.subview %dest[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
   vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<[4]x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
   return
 }
@@ -549,31 +577,11 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%A: memref<1000x1x
 
 // -----
 
-func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
-  %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0]
-    {in_bounds = [true, true, true, true]}
-    : vector<1x16x16x1xf32>, memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>
-  return
-}
-// CHECK:      func.func @drop_inner_most_dim
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
-// CHECK-DAG:    %[[C0:.+]] = arith.constant 0 : index
-// CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]]
-// CHECK-SAME:     memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>> to memref<1x512x16xf32, strided<[8192, 16, 1], offset: ?>>
-// CHECK:        %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<1x16x16x1xf32> to vector<1x16x16xf32>
-// CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]
-// CHECK-SAME:     [%[[C0]], %[[IDX]], %[[C0]]]
-
-// -----
-
 // NOTE: This is an out-of-bounds access.
 
-func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>, %vec: vector<4x8xf32>) {
+func.func @negative_non_unit_inner_vec_dim(%dest: memref<4x1xf32>, %vec: vector<4x8xf32>) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x8xf32>, memref<4x1xf32>
+  vector.transfer_write %vec, %dest[%c0, %c0] : vector<4x8xf32>, memref<4x1xf32>
   return
 }
 //      CHECK: func.func @negative_non_unit_inner_vec_dim
@@ -582,9 +590,9 @@ func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>, %vec: vector<
 
 // -----
 
-func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>, %vec: vector<4x1xf32>) {
+func.func @negative_non_unit_inner_memref_dim(%dest: memref<4x8xf32>, %vec: vector<4x1xf32>) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x1xf32>, memref<4x8xf32>
+  vector.transfer_write %vec, %dest[%c0, %c0] : vector<4x1xf32>, memref<4x8xf32>
   return
 }
 //      CHECK: func.func @negative_non_unit_inner_memref_dim
@@ -593,13 +601,14 @@ func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>, %vec: vect
 
 // -----
 
-func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %arg1: vector<16x16x1xf32>, %arg2: index) {
+// The inner most unit dims can not be dropped if the strides are not ones.
+
+func.func @negative_non_unit_strides(%dest: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %v: vector<16x16x1xf32>, %i: index) {
   %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%arg2, %c0, %c0]
+  vector.transfer_write %v, %dest[%i, %c0, %c0]
     {in_bounds = [true, true, true]}
     : vector<16x16x1xf32>, memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>
   return
 }
-// The inner most unit dims can not be dropped if the strides are not ones.
-// CHECK:     func.func @non_unit_strides
+// CHECK:     func.func @negative_non_unit_strides
 // CHECK-NOT:   memref.subview