[Mlir-commits] [mlir] [MLIR][Vector] Make transfer in_bounds index-aware (PR #193920)

Fri Apr 24 01:30:18 PDT 2026

https://github.com/zackc6 created https://github.com/llvm/llvm-project/pull/193920

Compute transfer in_bounds flags using constant indices and static base shapes instead of shape-only checks. This avoids marking dimensions in-bounds when offset accesses can exceed the underlying source or destination.

>From e0dc7991623856f1ce14b4d77456685e719be22a Mon Sep 17 00:00:00 2001
From: zack <zackchen666 at gmail.com>
Date: Fri, 24 Apr 2026 14:53:38 +0800
Subject: [PATCH] [MLIR][Vector] Make transfer in_bounds index-aware

Compute transfer in_bounds flags using constant indices and static base shapes instead of shape-only checks. This avoids marking dimensions in-bounds when offset accesses can exceed the underlying source or destination.
---
 mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp | 99 +++++++++++++++----
 .../insert-slice-with-patterns.mlir           | 97 ++++++++++++++++++
 2 files changed, 179 insertions(+), 17 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index 576023dbc9de1..06d20854cc888 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -406,6 +406,78 @@ static bool isMaskTriviallyFoldable(SmallVector<OpFoldResult> &maskSizes,
   return true;
 }
 
+/// Returns `in_bounds` values derived from static shapes and constant indices.
+/// Conservatively returns false for any dimension that cannot be proven safe.
+///
+/// `baseDims` and `alignedIndices` must be aligned with `vectorShape`:
+///   * `baseDims[vecDim]` is the base tensor dim checked for `vecDim`.
+///   * `alignedIndices[vecDim]` is the index used for that mapped base dim.
+static SmallVector<bool> computeInBoundsFromStaticShapeAndIndices(
+    ArrayRef<int64_t> baseShape, ArrayRef<int64_t> vectorShape,
+    ArrayRef<int64_t> baseDims, ArrayRef<Value> alignedIndices) {
+  SmallVector<bool> inBounds(vectorShape.size(), false);
+  if (baseDims.size() != vectorShape.size() ||
+      alignedIndices.size() != vectorShape.size())
+    return inBounds;
+
+  for (auto [vecDim, vecSize] : llvm::enumerate(vectorShape)) {
+    int64_t baseDim = baseDims[vecDim];
+    Value index = alignedIndices[vecDim];
+    if (!ShapedType::isStatic(baseShape[baseDim]) || vecSize < 0)
+      continue;
+    APSInt indexValue;
+    if (!matchPattern(index, m_ConstantInt(&indexValue)))
+      continue;
+    int64_t idx = indexValue.getSExtValue();
+    // Indexes must be non-negative and stay in range for the whole vector dim.
+    inBounds[vecDim] = idx >= 0 && idx + vecSize <= baseShape[baseDim];
+  }
+  return inBounds;
+}
+
+/// Compute in_bounds for transfer_read where source and vector ranks match.
+static SmallVector<bool>
+computeReadInBoundsFromStaticShape(ArrayRef<int64_t> sourceShape,
+                                   ArrayRef<int64_t> vectorShape,
+                                   ArrayRef<Value> readIndices) {
+  if (sourceShape.size() != vectorShape.size() ||
+      readIndices.size() != vectorShape.size())
+    return SmallVector<bool>(vectorShape.size(), false);
+
+  SmallVector<int64_t> sourceDims;
+  sourceDims.reserve(vectorShape.size());
+  for (auto [dim, _] : llvm::enumerate(vectorShape))
+    sourceDims.push_back(dim);
+
+  return computeInBoundsFromStaticShapeAndIndices(sourceShape, vectorShape,
+                                                  sourceDims, readIndices);
+}
+
+/// Compute in_bounds for transfer_write where write indices are expressed in
+/// destination-rank coordinates.
+static SmallVector<bool>
+computeWriteInBoundsFromStaticShape(ArrayRef<int64_t> destShape,
+                                    ArrayRef<int64_t> vectorShape,
+                                    ArrayRef<Value> writeIndices) {
+  if (writeIndices.size() != destShape.size() ||
+      destShape.size() < vectorShape.size())
+    return SmallVector<bool>(vectorShape.size(), false);
+
+  int64_t rankDiff = destShape.size() - vectorShape.size();
+  SmallVector<int64_t> destDims;
+  SmallVector<Value> alignedIndices;
+  destDims.reserve(vectorShape.size());
+  alignedIndices.reserve(vectorShape.size());
+  for (auto [vecDim, _] : llvm::enumerate(vectorShape)) {
+    int64_t destDim = rankDiff + vecDim;
+    destDims.push_back(destDim);
+    alignedIndices.push_back(writeIndices[destDim]);
+  }
+
+  return computeInBoundsFromStaticShapeAndIndices(destShape, vectorShape,
+                                                  destDims, alignedIndices);
+}
+
 Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
                                      Value source,
                                      ArrayRef<int64_t> inputVectorSizes,
@@ -441,16 +513,13 @@ Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
          "expected same pad element type to match source element type");
 
   auto zero = arith::ConstantIndexOp::create(builder, loc, 0);
+  SmallVector<Value> indices(vecToReadRank, zero);
   SmallVector<bool> inBoundsVal(vecToReadRank, true);
 
   if (useInBoundsInsteadOfMasking) {
-    // Update the inBounds attribute.
-    // FIXME: This computation is too weak - it ignores the read indices.
-    for (unsigned i = 0; i < vecToReadRank; i++)
-      inBoundsVal[i] = (sourceShape[i] == vecToReadShape[i]) &&
-                       ShapedType::isStatic(sourceShape[i]);
+    inBoundsVal = computeReadInBoundsFromStaticShape(sourceShape,
+                                                     vecToReadShape, indices);
   }
-  SmallVector<Value> indices(vecToReadRank, zero);
   auto transferReadOp =
       vector::TransferReadOp::create(builder, loc,
                                      /*vectorType=*/vecToReadTy,
@@ -491,17 +560,6 @@ Operation *vector::createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
   int64_t vecToStoreRank = vecToStoreType.getRank();
   auto vecToStoreShape = vecToStoreType.getShape();
 
-  // Compute the in_bounds attribute
-  SmallVector<bool> inBoundsVal(vecToStoreRank, true);
-  if (useInBoundsInsteadOfMasking) {
-    // Update the inBounds attribute.
-    // FIXME: This computation is too weak - it ignores the write indices.
-    for (unsigned i = 0; i < vecToStoreRank; i++)
-      inBoundsVal[i] =
-          (destShape[destRank - vecToStoreRank + i] >= vecToStoreShape[i]) &&
-          ShapedType::isStatic(destShape[destRank - vecToStoreRank + i]);
-  }
-
   // If missing, initialize the write indices to 0.
   bool useDefaultWriteIdxs = writeIndices.empty();
   assert((useDefaultWriteIdxs ||
@@ -512,6 +570,13 @@ Operation *vector::createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
     writeIndices.assign(destRank, zero);
   }
 
+  // Compute the in_bounds attribute.
+  SmallVector<bool> inBoundsVal(vecToStoreRank, true);
+  if (useInBoundsInsteadOfMasking) {
+    inBoundsVal = computeWriteInBoundsFromStaticShape(
+        destShape, vecToStoreShape, writeIndices);
+  }
+
   // Generate the xfer_write Op
   Operation *write = vector::TransferWriteOp::create(builder, loc,
                                                      /*vector=*/vecToStore,
diff --git a/mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir
index f7764be9be73f..c83856a7c99de 100644
--- a/mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir
@@ -59,6 +59,103 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// All source dimensions are dynamic, so read in_bounds cannot be proven for
+// any vector dimension. The transfer_read should therefore print without an
+// in_bounds attribute (all-false canonical form).
+//
+// CHECK-LABEL:   func.func @insert_dynamic_slice_all_false_read_in_bounds(
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<?x?x?xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32, %[[SZ0:.*]]: index, %[[SZ1:.*]]: index, %[[SZ2:.*]]: index) -> tensor<9x8x7x1x2x3xf32> {
+// CHECK:           %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32>
+// CHECK:           %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] : tensor<?x?x?xf32>, vector<1x2x3xf32>
+// CHECK:           %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           return %[[RES]] : tensor<9x8x7x1x2x3xf32>
+func.func @insert_dynamic_slice_all_false_read_in_bounds(
+    %arg0: tensor<?x?x?xf32>, %pad : f32,
+    %sz0: index, %sz1: index, %sz2: index) -> tensor<9x8x7x1x2x3xf32> {
+  %init = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+  %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32>
+  %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, %sz0, %sz1, %sz2][1, 1, 1, 1, 1, 1] : tensor<?x?x?xf32> into tensor<9x8x7x1x2x3xf32>
+  return %res : tensor<9x8x7x1x2x3xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// All write offsets in the vectorized dimensions are dynamic, so write
+// in_bounds cannot be proven for any vector dimension. The transfer_write
+// should print without an in_bounds attribute (all-false canonical form).
+//
+// CHECK-LABEL:   func.func @insert_static_slice_all_false_write_in_bounds(
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<1x2x3xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32, %[[OFF0:.*]]: index, %[[OFF1:.*]]: index, %[[OFF2:.*]]: index) -> tensor<9x8x7x1x2x3xf32> {
+// CHECK:           %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32>
+// CHECK:           %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32>
+// CHECK:           %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           return %[[RES]] : tensor<9x8x7x1x2x3xf32>
+func.func @insert_static_slice_all_false_write_in_bounds(
+    %arg0: tensor<1x2x3xf32>, %pad : f32,
+    %off0: index, %off1: index, %off2: index) -> tensor<9x8x7x1x2x3xf32> {
+  %init = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+  %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32>
+  %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, %off0, %off1, %off2] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32>
+  return %res : tensor<9x8x7x1x2x3xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// Non-zero insert offsets should affect transfer_write in_bounds. Since source
+// shape is dynamic, transfer_read also keeps a conservative in_bounds.
+//
+// CHECK-LABEL:   func.func @insert_dynamic_slice_non_zero_offset(
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<1x?x3xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32,
+// CHECK-SAME:      %[[SIZE:.*]]: index) -> tensor<9x8x7x1x2x3xf32> {
+// CHECK:           %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32>
+// CHECK:           %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, false, true]} : tensor<1x?x3xf32>, vector<1x2x3xf32>
+// CHECK:           %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, false, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           return %[[RES]] : tensor<9x8x7x1x2x3xf32>
+func.func @insert_dynamic_slice_non_zero_offset(%arg0: tensor<1x?x3xf32>, %pad : f32, %size: index) -> tensor<9x8x7x1x2x3xf32> {
+  %init = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+  %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32>
+  %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 1, 0] [1, 1, 1, 1, %size, 3][1, 1, 1, 1, 1, 1] : tensor<1x?x3xf32> into tensor<9x8x7x1x2x3xf32>
+  return %res : tensor<9x8x7x1x2x3xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // Same as above, but the source type has is dynamically shaped. This means
 // that the pad value is now required and the vector dim corresponding to the
 // dynamic shape has to be inferred from the shape of the destination tensor.