[Mlir-commits] [mlir] [mlir][vector] Update tests for collapse 6/n (nfc) (PR #98902)

Mon Jul 15 06:44:49 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-mlir-vector

@llvm/pr-subscribers-mlir

Author: Andrzej Warzyński (banach-space)

<details>
<summary>Changes</summary>

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

There's quite a few cases to consider, hence this is split into multiple
PRs.

In this PR, I am making the following changes:
* All input memrefs for `xfer_read` are are renamed as `%src`.
* All input memrefs for `xfer_write` are are renamed as `%dest`.
* All variables representing pad values for `xfer_read` are renamed as
  `%pad`.
* All vector variables (for `xfer_read` and `xfer_write`) are renamed as
  `%v`.
* Add `@contiguous_inner_most_non_zero_idx_in_bounds_scalable` for
  `xfer_read` (similar test already exists for `xfer_write`)
* All indiex variables are renamed as `%i` (1st index) and `%ii` (2nd
  index).

The above were marked as TODOs in the test file - these are not
resolved. In addition (to avoid sending another PR):
* `@drop_inner_most_dim` is deleted - it duplicates
  `@contiguous_inner_most` for xfer_write
* For consistency with other negative tests, renamed `@non_unit_strides`
  as `@negative_non_unit_strides` and added a similar test for
  `xfer_read`

This is a follow-up for: #94490, #94604, #94906, #96214, #96227


---

Patch is 35.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98902.diff


1 Files Affected:

- (modified) mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir (+138-129) 


``````````diff

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index beeb1362b7ab3..bc3c3c014c4d2 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -1,16 +1,14 @@
 // RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s
 
-// TODO: Unify how memref and vectors are named
-
 //-----------------------------------------------------------------------------
 // 1. vector.transfer_read
 //-----------------------------------------------------------------------------
 
-func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+func.func @contiguous_inner_most(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
-  return %0 : vector<1x8x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
+  return %v : vector<1x8x1xf32>
 }
 
 //      CHECK: func @contiguous_inner_most(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
@@ -23,13 +21,13 @@ func.func @contiguous_inner_most(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1
 
 // Same as the top example within this split, but with the inner vector
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{
+func.func @contiguous_inner_most_scalable_inner_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x[8]x1xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32>
-  return %0 : vector<1x[8]x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x[8]x1xf32>
+  return %v : vector<1x[8]x1xf32>
 }
 
 //      CHECK: func @contiguous_inner_most_scalable_inner_dim(%[[SRC:.+]]: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>
@@ -43,11 +41,11 @@ func.func @contiguous_inner_most_scalable_inner_dim(%in: memref<1x1x8x1xf32, str
 // Same as the top example within this split, but the trailing unit dim was
 // replaced with a dyn dim - not supported
 
-func.func @negative_dynamic_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
+func.func @negative_dynamic_trailing_dim(%src: memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x1xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
-  return %0 : vector<1x8x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x?xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x1xf32>
+  return %v : vector<1x8x1xf32>
 }
 
 //  CHECK-LABEL: func @negative_dynamic_trailing_dim
@@ -57,11 +55,11 @@ func.func @negative_dynamic_trailing_dim(%in: memref<1x1x8x?xf32, strided<[3072,
 // Same as the top example within this split, but with a "scalable unit" dim in
 // the output vector - not supported (scalable 1, [1], is _not_ a unit dimension).
 
-func.func @negative_scalable_one_trailing_dim(%in: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{
+func.func @negative_scalable_one_trailing_dim(%src: memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>) -> vector<1x8x[1]xf32>{
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = vector.transfer_read %in[%c0, %c0, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32>
-  return %0 : vector<1x8x[1]xf32>
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%c0, %c0, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<1x1x8x1xf32, strided<[3072, 8, 1, 1], offset: ?>>, vector<1x8x[1]xf32>
+  return %v : vector<1x8x[1]xf32>
 }
 //  CHECK-LABEL: func @negative_scalable_one_trailing_dim
 //    CHECK-NOT: memref.subview
@@ -69,10 +67,10 @@ func.func @negative_scalable_one_trailing_dim(%in: memref<1x1x8x1xf32, strided<[
 
 // -----
 
-func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
+func.func @contiguous_inner_most_dynamic_outer(%i: index, %ii: index, %memref: memref<?x?x8x1xf32>) -> vector<8x1xf32> {
   %c0 = arith.constant 0 : index
   %pad = arith.constant 0.0 : f32
-  %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
+  %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<8x1xf32>
   return %v : vector<8x1xf32>
 }
 // CHECK: func.func @contiguous_inner_most_dynamic_outer
@@ -93,12 +91,12 @@ func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %memref: me
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b: index, %memref: memref<?x?x8x1xf32>) -> vector<[8]x1xf32> {
+func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%i: index, %ii: index, %memref: memref<?x?x8x1xf32>) -> vector<[8]x1xf32> {
   %c0 = arith.constant 0 : index
   %pad = arith.constant 0.0 : f32
-  %v = vector.transfer_read %memref[%a, %b, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<[8]x1xf32>
+  %v = vector.transfer_read %memref[%i, %ii, %c0, %c0], %pad {in_bounds = [true, true]} : memref<?x?x8x1xf32>, vector<[8]x1xf32>
   return %v : vector<[8]x1xf32>
 }
 // CHECK-LABEL:  func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim
@@ -118,11 +116,11 @@ func.func @contiguous_inner_most_outer_dim_dyn_scalable_inner_dim(%a: index, %b:
 
 // The index to be dropped is == 0, so it's safe to collapse. The other index
 // should be preserved correctly.
-func.func @contiguous_inner_most_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
   %c0 = arith.constant 0 : index
-  %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_in_bounds(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
@@ -135,11 +133,11 @@ func.func @contiguous_inner_most_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:ind
 // The index to be dropped is == 0, so it's safe to collapse. The "out of
 // bounds" attribute is too conservative and will be folded to "in bounds"
 // before the pattern runs. The other index should be preserved correctly.
-func.func @contiguous_inner_most_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
   %c0 = arith.constant 0 : index
-  %1 = vector.transfer_read %A[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %c0], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_zero_idx_out_of_bounds(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
@@ -151,10 +149,10 @@ func.func @contiguous_inner_most_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i
 
 // The index to be dropped is unknown, but since it's "in bounds", it has to be
 // == 0. It's safe to collapse the corresponding dim.
-func.func @contiguous_inner_most_non_zero_idx_in_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @contiguous_inner_most_non_zero_idx_in_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idx_in_bounds(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
@@ -164,12 +162,29 @@ func.func @contiguous_inner_most_non_zero_idx_in_bounds(%A: memref<16x1xf32>, %i
 // CHECK:           %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<8xf32>
 // CHECK:           vector.shape_cast %[[READ]] : vector<8xf32> to vector<8x1xf32>
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%src: memref<16x1xf32>, %i:index) -> (vector<[8]x1xf32>) {
+  %pad = arith.constant 0.0 : f32
+  %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, true]} : memref<16x1xf32>, vector<[8]x1xf32>
+  return %v : vector<[8]x1xf32>
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) -> vector<[8]x1xf32> {
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[SV]]{{\[}}%[[IDX]]], %[[PAD]] {in_bounds = [true]} : memref<16xf32, strided<[1]>>, vector<[8]xf32>
+// CHECK:           vector.shape_cast %[[READ]] : vector<[8]xf32> to vector<[8]x1xf32>
+
 // The index to be dropped is unknown and "out of bounds" - not safe to
 // collapse.
-func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%A: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
+func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%src: memref<16x1xf32>, %i:index) -> (vector<8x1xf32>) {
   %pad = arith.constant 0.0 : f32
-  %1 = vector.transfer_read %A[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
-  return %1 : vector<8x1xf32>
+  %v = vector.transfer_read %src[%i, %i], %pad {in_bounds = [true, false]} : memref<16x1xf32>, vector<8x1xf32>
+  return %v : vector<8x1xf32>
 }
 // CHECK-LABEL:   func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(
 // CHECK-NOT:     memref.subview
@@ -179,12 +194,12 @@ func.func @negative_contiguous_inner_most_non_zero_idx_out_of_bounds(%A: memref<
 
 // -----
 
-func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview(%src: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<4x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32>
-  return %1 : vector<4x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<4x1xf32>
+  return %v : vector<4x1xf32>
 }
 //      CHECK: func @contiguous_inner_most_dim_with_subview(%[[SRC:.+]]: memref<1000x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
@@ -195,14 +210,14 @@ func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:ind
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%src: memref<1000x1xf32>, %i:index, %ii:index) -> (vector<[4]x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0], %cst {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32>
-  return %1 : vector<[4]x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0], %pad {in_bounds = [true, true]} : memref<40x1xf32, strided<[1, 1], offset: ?>>, vector<[4]x1xf32>
+  return %v : vector<[4]x1xf32>
 }
 // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
 //  CHECK-SAME:   %[[SRC:.+]]: memref<1000x1xf32>
@@ -213,12 +228,12 @@ func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<
 
 // -----
 
-func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d(%src: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<4x1x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32>
-  return %1 : vector<4x1x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<4x1x1xf32>
+  return %v : vector<4x1x1xf32>
 }
 //      CHECK: func @contiguous_inner_most_dim_with_subview_2d(%[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<4x1x1xf32>
 //      CHECK:   %[[SRC_0:.+]] = memref.subview %[[SRC]]
@@ -229,14 +244,14 @@ func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%src: memref<1000x1x1xf32>, %i:index, %ii:index) -> (vector<[4]x1x1xf32>) {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
-  %1 = vector.transfer_read %0[%ii, %c0, %c0], %cst {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32>
-  return %1 : vector<[4]x1x1xf32>
+  %pad = arith.constant 0.0 : f32
+  %sv = memref.subview %src[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  %v = vector.transfer_read %sv[%ii, %c0, %c0], %pad {in_bounds = [true, true, true]} : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>, vector<[4]x1x1xf32>
+  return %v : vector<[4]x1x1xf32>
 }
 // CHECK-LABEL: func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(
 //  CHECK-SAME:   %[[SRC:.+]]: memref<1000x1x1xf32>, %[[II:.+]]: index, %[[J:.+]]: index) -> vector<[4]x1x1xf32>
@@ -251,11 +266,11 @@ func.func @contiguous_inner_most_dim_with_subview_2d_scalable_inner_dim(%A: memr
 
 // NOTE: This is an out-of-bounds access.
 
-func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8xf32> {
+func.func @negative_non_unit_inner_vec_dim(%src: memref<4x1xf32>) -> vector<4x8xf32> {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x1xf32>, vector<4x8xf32>
-  return %0 : vector<4x8xf32>
+  %pad = arith.constant 0.000000e+00 : f32
+  %v = vector.transfer_read %src[%c0, %c0], %pad : memref<4x1xf32>, vector<4x8xf32>
+  return %v : vector<4x8xf32>
 }
 //      CHECK: func.func @negative_non_unit_inner_vec_dim
 //  CHECK-NOT:   memref.subview
@@ -263,11 +278,11 @@ func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>) -> vector<4x8
 
 // -----
 
-func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<4x1xf32> {
+func.func @negative_non_unit_inner_memref_dim(%src: memref<4x8xf32>) -> vector<4x1xf32> {
   %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = vector.transfer_read %arg0[%c0, %c0], %cst : memref<4x8xf32>, vector<4x1xf32>
-  return %0 : vector<4x1xf32>
+  %pad = arith.constant 0.000000e+00 : f32
+  %v = vector.transfer_read %src[%c0, %c0], %pad : memref<4x8xf32>, vector<4x1xf32>
+  return %v : vector<4x1xf32>
 }
 //      CHECK: func.func @negative_non_unit_inner_memref_dim
 //  CHECK-NOT:   memref.subview
@@ -275,13 +290,28 @@ func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<
 
 // -----
 
+// The inner most unit dims can not be dropped if the strides are not ones.
+
+func.func @negative_non_unit_strides(%src: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %i: index) -> vector<16x16x1xf32> {
+  %c0 = arith.constant 0 : index
+  %pad = arith.constant 0.000000e+00 : f32
+  %v = vector.transfer_read %src[%i, %c0, %c0], %pad
+    {in_bounds = [true, true, true]}
+    : memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, vector<16x16x1xf32>
+  return %v : vector<16x16x1xf32>
+}
+// CHECK:     func.func @negative_non_unit_strides
+// CHECK-NOT:   memref.subview
+
+// -----
+
 //-----------------------------------------------------------------------------
 // 2. vector.transfer_write
 //-----------------------------------------------------------------------------
 
-func.func @contiguous_inner_most(%arg0: memref<1x512x16x1x1xf32>, %arg1:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/98902