[Mlir-commits] [mlir] [mlir][vector] Make the in_bounds attribute mandatory (PR #97049)

Fri Jun 28 05:48:35 PDT 2024

https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/97049

WIP


>From 9e1cf900649d4a99671f2fe39393b8212acd2cb4 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 19 Jun 2024 07:57:59 +0100
Subject: [PATCH] [mlir][vector] Make the in_bounds attribute mandatory

WIP
---
 .../mlir/Dialect/Vector/IR/VectorOps.td       |  4 +-
 .../mlir/Interfaces/VectorInterfaces.td       |  4 +-
 mlir/lib/Dialect/Vector/IR/VectorOps.cpp      |  7 +-
 .../Vector/Transforms/LowerVectorMask.cpp     |  4 +-
 .../Vector/Transforms/LowerVectorTransfer.cpp | 10 +--
 .../Conversion/VectorToSCF/vector-to-scf.mlir |  2 +-
 .../Linalg/vectorization-with-patterns.mlir   |  6 +-
 mlir/test/Dialect/Vector/ops.mlir             |  4 +-
 .../loop-invariant-subset-hoisting.mlir       | 78 +++++++++----------
 9 files changed, 56 insertions(+), 63 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 56d866ac5b40c..4e4d53582ae30 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -1321,7 +1321,7 @@ def Vector_TransferReadOp :
                    AffineMapAttr:$permutation_map,
                    AnyType:$padding,
                    Optional<VectorOf<[I1]>>:$mask,
-                   OptionalAttr<BoolArrayAttr>:$in_bounds)>,
+                   BoolArrayAttr:$in_bounds)>,
     Results<(outs AnyVectorOfAnyRank:$vector)> {
 
   let summary = "Reads a supervector from memory into an SSA vector value.";
@@ -1565,7 +1565,7 @@ def Vector_TransferWriteOp :
                    Variadic<Index>:$indices,
                    AffineMapAttr:$permutation_map,
                    Optional<VectorOf<[I1]>>:$mask,
-                   OptionalAttr<BoolArrayAttr>:$in_bounds)>,
+                   BoolArrayAttr:$in_bounds)>,
     Results<(outs Optional<AnyRankedTensor>:$result)> {
 
   let summary = "The vector.transfer_write op writes a supervector to memory.";
diff --git a/mlir/include/mlir/Interfaces/VectorInterfaces.td b/mlir/include/mlir/Interfaces/VectorInterfaces.td
index 781d6d3e3f813..f6682f2eabe1e 100644
--- a/mlir/include/mlir/Interfaces/VectorInterfaces.td
+++ b/mlir/include/mlir/Interfaces/VectorInterfaces.td
@@ -98,7 +98,7 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> {
         dimension whether it is in-bounds or not. (Broadcast dimensions are
         always in-bounds).
       }],
-      /*retTy=*/"::std::optional<::mlir::ArrayAttr>",
+      /*retTy=*/"::mlir::ArrayAttr",
       /*methodName=*/"getInBounds",
       /*args=*/(ins)
     >,
@@ -242,7 +242,7 @@ def VectorTransferOpInterface : OpInterface<"VectorTransferOpInterface"> {
         return true;
       if (!$_op.getInBounds())
         return false;
-      auto inBounds = ::llvm::cast<::mlir::ArrayAttr>(*$_op.getInBounds());
+      auto inBounds = $_op.getInBounds();
       return ::llvm::cast<::mlir::BoolAttr>(inBounds[dim]).getValue();
     }
 
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 2bf4f16f96e6a..90a83ebe5c8aa 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -3859,9 +3859,6 @@ static void printTransferAttrs(OpAsmPrinter &p, VectorTransferOpInterface op) {
   elidedAttrs.push_back(TransferReadOp::getOperandSegmentSizeAttr());
   if (op.getPermutationMap().isMinorIdentity())
     elidedAttrs.push_back(op.getPermutationMapAttrName());
-  // Elide in_bounds attribute if all dims are out-of-bounds.
-  if (llvm::none_of(op.getInBoundsValues(), [](bool b) { return b; }))
-    elidedAttrs.push_back(op.getInBoundsAttrName());
   p.printOptionalAttrDict(op->getAttrs(), elidedAttrs);
 }
 
@@ -3970,7 +3967,7 @@ LogicalResult TransferReadOp::verify() {
   if (failed(verifyTransferOp(cast<VectorTransferOpInterface>(getOperation()),
                               shapedType, vectorType, maskType,
                               inferredMaskType, permutationMap,
-                              getInBounds() ? *getInBounds() : ArrayAttr())))
+                              getInBounds())))
     return failure();
 
   if (auto sourceVectorElementType =
@@ -4356,7 +4353,7 @@ LogicalResult TransferWriteOp::verify() {
   if (failed(verifyTransferOp(cast<VectorTransferOpInterface>(getOperation()),
                               shapedType, vectorType, maskType,
                               inferredMaskType, permutationMap,
-                              getInBounds() ? *getInBounds() : ArrayAttr())))
+                              getInBounds())))
     return failure();
 
   return verifyPermutationMap(permutationMap,
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp
index f53bb5157eb37..dfeb7bc53adad 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp
@@ -224,7 +224,7 @@ struct MaskedTransferReadOpPattern
     rewriter.replaceOpWithNewOp<TransferReadOp>(
         maskingOp.getOperation(), readOp.getVectorType(), readOp.getSource(),
         readOp.getIndices(), readOp.getPermutationMap(), readOp.getPadding(),
-        maskingOp.getMask(), readOp.getInBounds().value_or(ArrayAttr()));
+        maskingOp.getMask(), readOp.getInBounds());
     return success();
   }
 };
@@ -246,7 +246,7 @@ struct MaskedTransferWriteOpPattern
     rewriter.replaceOpWithNewOp<TransferWriteOp>(
         maskingOp.getOperation(), resultType, writeOp.getVector(),
         writeOp.getSource(), writeOp.getIndices(), writeOp.getPermutationMap(),
-        maskingOp.getMask(), writeOp.getInBounds().value_or(ArrayAttr()));
+        maskingOp.getMask(), writeOp.getInBounds());
     return success();
   }
 };
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
index c31c51489ecc9..a8a6fef284907 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp
@@ -132,10 +132,8 @@ struct TransferReadPermutationLowering
     }
 
     // Transpose in_bounds attribute.
-    ArrayAttr newInBoundsAttr =
-        op.getInBounds() ? inverseTransposeInBoundsAttr(
-                               rewriter, op.getInBounds().value(), permutation)
-                         : ArrayAttr();
+    ArrayAttr newInBoundsAttr = inverseTransposeInBoundsAttr(
+        rewriter, op.getInBounds(), permutation);
 
     // Generate new transfer_read operation.
     VectorType newReadType = VectorType::get(
@@ -208,9 +206,7 @@ struct TransferWritePermutationLowering
 
     // Transpose in_bounds attribute.
     ArrayAttr newInBoundsAttr =
-        op.getInBounds() ? inverseTransposeInBoundsAttr(
-                               rewriter, op.getInBounds().value(), permutation)
-                         : ArrayAttr();
+        inverseTransposeInBoundsAttr(rewriter, op.getInBounds(), permutation);
 
     // Generate new transfer_write operation.
     Value newVec = rewriter.create<vector::TransposeOp>(
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
index e1babdd2f1f63..ea57e2afbaa2b 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
@@ -174,7 +174,7 @@ func.func @materialize_write(%M: index, %N: index, %O: index, %P: index) {
   // CHECK:                      scf.for %[[I6:.*]] = %[[C0]] to %[[C1]] step %[[C1]] {
   // CHECK:                        %[[S0:.*]] = affine.apply #[[$ADD]](%[[I2]], %[[I6]])
   // CHECK:                        %[[VEC:.*]] = memref.load %[[VECTOR_VIEW3]][%[[I4]], %[[I5]], %[[I6]]] : memref<3x4x1xvector<5xf32>>
-  // CHECK:                        vector.transfer_write %[[VEC]], %{{.*}}[%[[S3]], %[[S1]], %[[S0]], %[[I3]]] : vector<5xf32>, memref<?x?x?x?xf32>
+  // CHECK:                        vector.transfer_write %[[VEC]], %{{.*}}[%[[S3]], %[[S1]], %[[S0]], %[[I3]]] {in_bounds = [false]} : vector<5xf32>, memref<?x?x?x?xf32>
   // CHECK:                      }
   // CHECK:                    }
   // CHECK:                  }
diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
index d7ff1ded9d933..7176dff9bc857 100644
--- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
@@ -950,7 +950,7 @@ module attributes {transform.with_named_sequence} {
 //   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C5:.*]] = arith.constant 5.0
-//       CHECK:   %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
+//       CHECK:   %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] {in_bounds = [false, false]} : tensor<5x6xf32>, vector<7x9xf32>
 //       CHECK:   return %[[RESULT]]
 func.func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> {
   %c0 = arith.constant 0 : index
@@ -984,7 +984,7 @@ func.func private @make_vector() -> vector<7x9xf32>
 //   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
-//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32>
+//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] {in_bounds = [false, false]} : vector<7x9xf32>, tensor<5x6xf32>
 //       CHECK:   return %[[RESULT]]
 func.func @pad_and_transfer_write_static(
     %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> {
@@ -1021,7 +1021,7 @@ func.func private @make_vector() -> vector<7x9xf32>
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
 //       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
-//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<?x6xf32>
+//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] {in_bounds = [false, false]} : vector<7x9xf32>, tensor<?x6xf32>
 //       CHECK:   return %[[RESULT]]
 func.func @pad_and_transfer_write_dynamic_static(
     %arg0: tensor<?x?xf32>, %size: index, %padding: index) -> tensor<?x6xf32> {
diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir
index c868c881d079a..afe62a2427fb0 100644
--- a/mlir/test/Dialect/Vector/ops.mlir
+++ b/mlir/test/Dialect/Vector/ops.mlir
@@ -78,7 +78,7 @@ func.func @vector_transfer_ops(%arg0: memref<?x?xf32>,
   vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, memref<?x?xf32>
   // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
   vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
-  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
+  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
   vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, memref<?x?xvector<4x3xf32>>
   // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x24xi8>, memref<?x?xvector<4x3xi32>>
   vector.transfer_write %6, %arg2[%c3, %c3] : vector<5x24xi8>, memref<?x?xvector<4x3xi32>>
@@ -135,7 +135,7 @@ func.func @vector_transfer_ops_tensor(%arg0: tensor<?x?xf32>,
   %9 = vector.transfer_write %1, %arg0[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d1, d0)>} : vector<3x7xf32>, tensor<?x?xf32>
   // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
   %10 = vector.transfer_write %4, %arg1[%c3, %c3] {permutation_map = affine_map<(d0, d1)->(d0, d1)>} : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
-  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
+  // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
   %11 = vector.transfer_write %5, %arg1[%c3, %c3] {in_bounds = [false, false]} : vector<1x1x4x3xf32>, tensor<?x?xvector<4x3xf32>>
   // CHECK: vector.transfer_write %{{.*}}, %{{.*}}[%[[C3]], %[[C3]]] : vector<5x24xi8>, tensor<?x?xvector<4x3xi32>>
   %12 = vector.transfer_write %6, %arg2[%c3, %c3] : vector<5x24xi8>, tensor<?x?xvector<4x3xi32>>
diff --git a/mlir/test/Transforms/loop-invariant-subset-hoisting.mlir b/mlir/test/Transforms/loop-invariant-subset-hoisting.mlir
index 3a78287a0dcad..54e2d92f9a952 100644
--- a/mlir/test/Transforms/loop-invariant-subset-hoisting.mlir
+++ b/mlir/test/Transforms/loop-invariant-subset-hoisting.mlir
@@ -326,12 +326,12 @@ func.func @hoist_vector_transfer_pairs_tensor(
               %arg9 = %arg3,  %arg10 = %arg4, %arg11 = %arg5)
     -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>,
        tensor<?x?xf32>, tensor<?x?xf32>)  {
-      %r0 = vector.transfer_read %arg7[%c0, %c0], %cst: tensor<?x?xf32>, vector<1xf32>
-      %r1 = vector.transfer_read %arg6[%i, %i], %cst: tensor<?x?xf32>, vector<2xf32>
-      %r3 = vector.transfer_read %arg9[%c0, %c0], %cst: tensor<?x?xf32>, vector<4xf32>
+      %r0 = vector.transfer_read %arg7[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<1xf32>
+      %r1 = vector.transfer_read %arg6[%i, %i], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
+      %r3 = vector.transfer_read %arg9[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<4xf32>
       "test.some_crippling_use"(%arg10) : (tensor<?x?xf32>) -> ()
-      %r4 = vector.transfer_read %arg10[%c0, %c0], %cst: tensor<?x?xf32>, vector<5xf32>
-      %r5 = vector.transfer_read %arg11[%c0, %c0], %cst: tensor<?x?xf32>, vector<6xf32>
+      %r4 = vector.transfer_read %arg10[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<5xf32>
+      %r5 = vector.transfer_read %arg11[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<6xf32>
       "test.some_crippling_use"(%arg11) : (tensor<?x?xf32>) -> ()
       %u0 = "test.some_use"(%r0) : (vector<1xf32>) -> vector<1xf32>
       %u1 = "test.some_use"(%r1) : (vector<2xf32>) -> vector<2xf32>
@@ -339,12 +339,12 @@ func.func @hoist_vector_transfer_pairs_tensor(
       %u3 = "test.some_use"(%r3) : (vector<4xf32>) -> vector<4xf32>
       %u4 = "test.some_use"(%r4) : (vector<5xf32>) -> vector<5xf32>
       %u5 = "test.some_use"(%r5) : (vector<6xf32>) -> vector<6xf32>
-      %w1 = vector.transfer_write %u0, %arg7[%c0, %c0] : vector<1xf32>, tensor<?x?xf32>
-      %w0 = vector.transfer_write %u1, %arg6[%i, %i] : vector<2xf32>, tensor<?x?xf32>
-      %w2 = vector.transfer_write %u2, %arg8[%c0, %c0] : vector<3xf32>, tensor<?x?xf32>
-      %w3 = vector.transfer_write %u3, %arg9[%c0, %c0] : vector<4xf32>, tensor<?x?xf32>
-      %w4 = vector.transfer_write %u4, %arg10[%c0, %c0] : vector<5xf32>, tensor<?x?xf32>
-      %w5 = vector.transfer_write %u5, %arg11[%c0, %c0] : vector<6xf32>, tensor<?x?xf32>
+      %w1 = vector.transfer_write %u0, %arg7[%c0, %c0] {in_bounds=[true]} : vector<1xf32>, tensor<?x?xf32>
+      %w0 = vector.transfer_write %u1, %arg6[%i, %i] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
+      %w2 = vector.transfer_write %u2, %arg8[%c0, %c0] {in_bounds=[true]} : vector<3xf32>, tensor<?x?xf32>
+      %w3 = vector.transfer_write %u3, %arg9[%c0, %c0] {in_bounds=[true]} : vector<4xf32>, tensor<?x?xf32>
+      %w4 = vector.transfer_write %u4, %arg10[%c0, %c0] {in_bounds=[true]} : vector<5xf32>, tensor<?x?xf32>
+      %w5 = vector.transfer_write %u5, %arg11[%c0, %c0] {in_bounds=[true]} : vector<6xf32>, tensor<?x?xf32>
       "test.some_crippling_use"(%w3) : (tensor<?x?xf32>) -> ()
       scf.yield %w0, %w1, %w2, %w3, %w4, %w5 :
         tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>,
@@ -415,14 +415,14 @@ func.func @hoist_vector_transfer_pairs_disjoint_tensor(
     iter_args(%arg4 = %arg0, %arg5 = %arg1, %arg6 = %arg2,
               %arg7 = %arg3)
     -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
-      %r00 = vector.transfer_read %arg5[%c0, %c0], %cst: tensor<?x?xf32>, vector<2xf32>
-      %r01 = vector.transfer_read %arg5[%c0, %c1], %cst: tensor<?x?xf32>, vector<2xf32>
-      %r20 = vector.transfer_read %arg6[%c0, %c0], %cst: tensor<?x?xf32>, vector<3xf32>
-      %r21 = vector.transfer_read %arg6[%c0, %c3], %cst: tensor<?x?xf32>, vector<3xf32>
-      %r30 = vector.transfer_read %arg7[%c0, %random_index], %cst: tensor<?x?xf32>, vector<4xf32>
-      %r31 = vector.transfer_read %arg7[%c1, %random_index], %cst: tensor<?x?xf32>, vector<4xf32>
-      %r10 = vector.transfer_read %arg4[%i, %i], %cst: tensor<?x?xf32>, vector<2xf32>
-      %r11 = vector.transfer_read %arg4[%random_index, %random_index], %cst: tensor<?x?xf32>, vector<2xf32>
+      %r00 = vector.transfer_read %arg5[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
+      %r01 = vector.transfer_read %arg5[%c0, %c1], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
+      %r20 = vector.transfer_read %arg6[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<3xf32>
+      %r21 = vector.transfer_read %arg6[%c0, %c3], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<3xf32>
+      %r30 = vector.transfer_read %arg7[%c0, %random_index], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<4xf32>
+      %r31 = vector.transfer_read %arg7[%c1, %random_index], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<4xf32>
+      %r10 = vector.transfer_read %arg4[%i, %i], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
+      %r11 = vector.transfer_read %arg4[%random_index, %random_index], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
       %u00 = "test.some_use"(%r00) : (vector<2xf32>) -> vector<2xf32>
       %u01 = "test.some_use"(%r01) : (vector<2xf32>) -> vector<2xf32>
       %u20 = "test.some_use"(%r20) : (vector<3xf32>) -> vector<3xf32>
@@ -431,14 +431,14 @@ func.func @hoist_vector_transfer_pairs_disjoint_tensor(
       %u31 = "test.some_use"(%r31) : (vector<4xf32>) -> vector<4xf32>
       %u10 = "test.some_use"(%r10) : (vector<2xf32>) -> vector<2xf32>
       %u11 = "test.some_use"(%r11) : (vector<2xf32>) -> vector<2xf32>
-      %w10 = vector.transfer_write %u00, %arg5[%c0, %c0] : vector<2xf32>, tensor<?x?xf32>
-      %w11 = vector.transfer_write %u01, %w10[%c0, %c1] : vector<2xf32>, tensor<?x?xf32>
-      %w20 = vector.transfer_write %u20, %arg6[%c0, %c0] : vector<3xf32>, tensor<?x?xf32>
-      %w21 = vector.transfer_write %u21, %w20[%c0, %c3] : vector<3xf32>, tensor<?x?xf32>
-      %w30 = vector.transfer_write %u30, %arg7[%c0, %random_index] : vector<4xf32>, tensor<?x?xf32>
-      %w31 = vector.transfer_write %u31, %w30[%c1, %random_index] : vector<4xf32>, tensor<?x?xf32>
-      %w00 = vector.transfer_write %u10, %arg4[%i, %i] : vector<2xf32>, tensor<?x?xf32>
-      %w01 = vector.transfer_write %u11, %w00[%random_index, %random_index] : vector<2xf32>, tensor<?x?xf32>
+      %w10 = vector.transfer_write %u00, %arg5[%c0, %c0] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
+      %w11 = vector.transfer_write %u01, %w10[%c0, %c1] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
+      %w20 = vector.transfer_write %u20, %arg6[%c0, %c0] {in_bounds=[true]} : vector<3xf32>, tensor<?x?xf32>
+      %w21 = vector.transfer_write %u21, %w20[%c0, %c3] {in_bounds=[true]} : vector<3xf32>, tensor<?x?xf32>
+      %w30 = vector.transfer_write %u30, %arg7[%c0, %random_index] {in_bounds=[true]} : vector<4xf32>, tensor<?x?xf32>
+      %w31 = vector.transfer_write %u31, %w30[%c1, %random_index] {in_bounds=[true]} : vector<4xf32>, tensor<?x?xf32>
+      %w00 = vector.transfer_write %u10, %arg4[%i, %i] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
+      %w01 = vector.transfer_write %u11, %w00[%random_index, %random_index] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
       scf.yield %w01, %w11, %w21, %w31 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
     }
     scf.yield %1#0,  %1#1, %1#2, %1#3 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>
@@ -492,19 +492,19 @@ func.func @hoist_vector_transfer_pairs_tensor_and_slices(
     -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)  {
       // Hoists.
       %st0 = tensor.extract_slice %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-      %r0 = vector.transfer_read %st0[%c0, %c0], %cst: tensor<?x?xf32>, vector<1xf32>
+      %r0 = vector.transfer_read %st0[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<1xf32>
 
       // CHECK:     %[[ST1:.*]] = tensor.extract_slice %[[TENSOR1_ARG_L2]][%[[J]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
       // CHECK:     %[[V1:.*]] = vector.transfer_read %[[ST1]]{{.*}} : tensor<?x?xf32>, vector<2xf32>
       // Does not hoist (slice depends on %j)
       %st1 = tensor.extract_slice %arg7[%j, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-      %r1 = vector.transfer_read %st1[%c0, %c0], %cst: tensor<?x?xf32>, vector<2xf32>
+      %r1 = vector.transfer_read %st1[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
 
       // CHECK:     %[[ST2:.*]] = tensor.extract_slice %[[TENSOR2_ARG_L2]][%[[I]],{{.*}}: tensor<?x?xf32> to tensor<?x?xf32>
       // CHECK:     %[[V2:.*]] = vector.transfer_read %[[ST2]]{{.*}} : tensor<?x?xf32>, vector<3xf32>
       // Does not hoist, 2 slice %arg8.
       %st2 = tensor.extract_slice %arg8[%i, %c0][%step, %step][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-      %r2 = vector.transfer_read %st2[%c0, %c0], %cst: tensor<?x?xf32>, vector<3xf32>
+      %r2 = vector.transfer_read %st2[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<3xf32>
 
       // CHECK:     %[[U0:.*]] = "test.some_use"(%[[V0_ARG_L2]]) : (vector<1xf32>) -> vector<1xf32>
       // CHECK:     %[[U1:.*]] = "test.some_use"(%[[V1]]) : (vector<2xf32>) -> vector<2xf32>
@@ -514,15 +514,15 @@ func.func @hoist_vector_transfer_pairs_tensor_and_slices(
       %u2 = "test.some_use"(%r2) : (vector<3xf32>) -> vector<3xf32>
 
       // Hoists
-      %w0 = vector.transfer_write %u0, %st0[%c0, %c0] : vector<1xf32>, tensor<?x?xf32>
+      %w0 = vector.transfer_write %u0, %st0[%c0, %c0] {in_bounds=[true]} : vector<1xf32>, tensor<?x?xf32>
 
       // CHECK-DAG:     %[[STI1:.*]] = vector.transfer_write %[[U1]], %{{.*}} : vector<2xf32>, tensor<?x?xf32>
       // Does not hoist (associated slice depends on %j).
-      %w1 = vector.transfer_write %u1, %st1[%i, %i] : vector<2xf32>, tensor<?x?xf32>
+      %w1 = vector.transfer_write %u1, %st1[%i, %i] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
 
       // CHECK-DAG:     %[[STI2:.*]] = vector.transfer_write %[[U2]], %{{.*}} : vector<3xf32>, tensor<?x?xf32>
       // Does not hoist, 2 slice / insert_slice for %arg8.
-      %w2 = vector.transfer_write %u2, %st2[%c0, %c0] : vector<3xf32>, tensor<?x?xf32>
+      %w2 = vector.transfer_write %u2, %st2[%c0, %c0] {in_bounds=[true]} : vector<3xf32>, tensor<?x?xf32>
 
       // Hoists.
       %sti0 = tensor.insert_slice %w0 into %arg6[%i, %i][%step, %step][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
@@ -570,8 +570,8 @@ func.func @hoist_vector_transfer_pairs_tensor_and_slices(
 //       CHECK:     %[[R5:.*]] = "test.some_use"(%[[R3]]) : (vector<2xf32>) -> vector<2xf32>
 //       CHECK:     scf.yield %[[TL]], %[[R4]], %[[R5]] : tensor<?x?xf32>, vector<2xf32>, vector<2xf32>
 //       CHECK:   }
-//       CHECK:   %[[W0:.*]] = vector.transfer_write %[[F]]#2, %[[F]]#0[%[[C0]], %[[C3]]] : vector<2xf32>, tensor<?x?xf32>
-//       CHECK:   %[[W1:.*]] = vector.transfer_write %[[F]]#1, %[[W0]][%[[C0]], %[[C0]]] : vector<2xf32>, tensor<?x?xf32>
+//       CHECK:   %[[W0:.*]] = vector.transfer_write %[[F]]#2, %[[F]]#0[%[[C0]], %[[C3]]] {{.*}} : vector<2xf32>, tensor<?x?xf32>
+//       CHECK:   %[[W1:.*]] = vector.transfer_write %[[F]]#1, %[[W0]][%[[C0]], %[[C0]]] {{.*}} : vector<2xf32>, tensor<?x?xf32>
 //       CHECK:  return %[[W1]] : tensor<?x?xf32>
 func.func @hoist_vector_transfer_write_pairs_disjoint_tensor(
     %tensor: tensor<?x?xf32>,
@@ -583,14 +583,14 @@ func.func @hoist_vector_transfer_write_pairs_disjoint_tensor(
   %cst = arith.constant 0.0 : f32
   %1 = scf.for %j = %lb to %ub step %step iter_args(%arg5 = %tensor)
     -> (tensor<?x?xf32>) {
-    %r00 = vector.transfer_read %arg5[%c0, %c0], %cst: tensor<?x?xf32>, vector<2xf32>
+    %r00 = vector.transfer_read %arg5[%c0, %c0], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
     %u00 = "test.some_use"(%r00) : (vector<2xf32>) -> vector<2xf32>
-    %w10 = vector.transfer_write %u00, %arg5[%c0, %c0] : vector<2xf32>, tensor<?x?xf32>
+    %w10 = vector.transfer_write %u00, %arg5[%c0, %c0] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
 
     // Hoist by properly bypassing the disjoint write %w10.
-    %r01 = vector.transfer_read %w10[%c0, %c3], %cst: tensor<?x?xf32>, vector<2xf32>
+    %r01 = vector.transfer_read %w10[%c0, %c3], %cst {in_bounds=[true]}: tensor<?x?xf32>, vector<2xf32>
     %u01 = "test.some_use"(%r01) : (vector<2xf32>) -> vector<2xf32>
-    %w11 = vector.transfer_write %u01, %w10[%c0, %c3] : vector<2xf32>, tensor<?x?xf32>
+    %w11 = vector.transfer_write %u01, %w10[%c0, %c3] {in_bounds=[true]} : vector<2xf32>, tensor<?x?xf32>
     scf.yield %w11 : tensor<?x?xf32>
   }
   return %1 : tensor<?x?xf32>