[Mlir-commits] [mlir] [MLIR][Vector] Fix vector.create_mask i32 overflow for large index values (PR #188782)

Thu Mar 26 09:05:03 PDT 2026

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Mehdi Amini (joker-eph)

<details>
<summary>Changes</summary>

When lowering `vector.create_mask` for 1-D fixed-size vectors with `force-32bit-vector-indices=true`, the mask bound (an `index`-typed value) was cast directly to `i32`. For index values larger than INT32_MAX (e.g., 2^51), this truncation wraps to a small or negative i32 value, making all vector comparison results false — every mask element becomes 0 even when the bound is larger than the vector dimension.

Fix: in `buildVectorComparison`, clamp the bound to `[_, dim]` using `arith.minsi` in index type before the `i32` cast. If the bound is >= dim, all elements should be true regardless; clamping to `dim` (which fits in i32 since it is a compile-time vector dimension size) preserves that semantic without overflow.

The fix applies to both `vector.create_mask` lowering and `vector.transfer_read/write` out-of-bounds mask generation, both of which call `buildVectorComparison`.

Fixes #113689

Assisted-by: Claude Code

---
Full diff: https://github.com/llvm/llvm-project/pull/188782.diff


4 Files Affected:

- (modified) mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp (+9) 
- (modified) mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir (+3-1) 
- (modified) mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir (+3-1) 
- (modified) mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir (+6-7) 


``````````diff

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index c694f4f58faa1..90a6a4f2a9f32 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1361,6 +1361,15 @@ static Value buildVectorComparison(PatternRewriter &rewriter, Operation *op,
     indices = arith::AddIOp::create(rewriter, loc, ov, indices);
   }
   // Construct the vector comparison.
+  // When using 32-bit indices, clamp `b` to [INT_MIN, dim] in index type
+  // before casting. This prevents signed overflow for large index values: if
+  // `b >= dim`, all elements are set, so clamping to `dim` is semantically
+  // equivalent and avoids truncation artifacts (e.g., 2^51 wrapping to 0 in
+  // i32).
+  if (force32BitVectorIndices && dim > 0) {
+    Value dimCst = arith::ConstantIndexOp::create(rewriter, loc, dim);
+    b = arith::MinSIOp::create(rewriter, loc, b, dimCst);
+  }
   Value bound = getValueOrCreateCastToIndexLike(rewriter, loc, idxType, b);
   Value bounds =
       vector::BroadcastOp::create(rewriter, loc, indices.getType(), bound);
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
index 91e5358622b69..86e906e6ce1b7 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
@@ -4,7 +4,9 @@
 // CMP32-LABEL: @genbool_var_1d(
 // CMP32-SAME: %[[ARG:.*]]: index)
 // CMP32: %[[T0:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>
-// CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32
+// CMP32: %[[DIM:.*]] = arith.constant 11 : index
+// CMP32: %[[CLAMPED:.*]] = arith.minsi %[[ARG]], %[[DIM]] : index
+// CMP32: %[[T1:.*]] = arith.index_cast %[[CLAMPED]] : index to i32
 // CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi32>
 // CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi32>
 // CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32>
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index d669a3bac3336..df83c2989bec7 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -1560,7 +1560,9 @@ func.func @create_mask_1d(%num_elems : index) -> vector<4xi1> {
 // CHECK-LABEL: func @create_mask_1d
 // CHECK-SAME: %[[NUM_ELEMS:.*]]: index
 // CHECK:  %[[INDICES:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
-// CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32
+// CHECK:  %[[DIM:.*]] = arith.constant 4 : index
+// CHECK:  %[[CLAMPED:.*]] = arith.minsi %[[NUM_ELEMS]], %[[DIM]] : index
+// CHECK:  %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[CLAMPED]] : index to i32
 // CHECK:  %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
 // CHECK:  %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]]
 // CHECK:  %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS]], %[[INDICES]] : vector<4xi32>
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir
index d3f6d7eca90b4..15be692f92962 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir
@@ -28,13 +28,12 @@ func.func @transfer_read_write_1d(%A : memref<?xf32>, %base: index) -> vector<17
 //
 // 4. Create bound vector to compute in-bound mask:
 //    [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-//       CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] :
-//  CMP32-SAME: index to i32
-//  CMP64-SAME: index to i64
+//    Note: for 32-bit indices, the bound is first clamped via arith.minsi to
+//    prevent i32 overflow for large index values.
+//       CHECK: %[[btrunc:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
 //       CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
 //       CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
 //       CHECK: %[[mask:.*]] = arith.cmpi sgt, %[[boundVect]], %[[linearIndex]] : vector<17x[[$IDX_TYPE]]>
-//  CMP64-SAME: : vector<17xi64>
 //
 // 5. Bitcast to vector form.
 //       CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}} :
@@ -51,8 +50,7 @@ func.func @transfer_read_write_1d(%A : memref<?xf32>, %base: index) -> vector<17
 //
 // 2. Create bound vector to compute in-bound mask:
 //    [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-//       CHECK: %[[btrunc_b:.*]] = arith.index_cast %[[BOUND_b]]
-//  CMP32-SAME: index to i32
+//       CHECK: %[[btrunc_b:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
 //       CHECK: %[[boundVecInsert_b:.*]] = llvm.insertelement %[[btrunc_b]]
 //       CHECK: %[[boundVect_b:.*]] = llvm.shufflevector %[[boundVecInsert_b]]
 //       CHECK: %[[mask_b:.*]] = arith.cmpi sgt, %[[boundVect_b]],
@@ -200,7 +198,8 @@ func.func @transfer_read_2d_to_1d(%A : memref<?x?xf32>, %base0: index, %base1: i
 //
 // Create bound vector to compute in-bound mask:
 //    [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-//       CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] : index to [[$IDX_TYPE]]
+//    Note: for 32-bit indices, the bound is first clamped via arith.minsi.
+//       CHECK: %[[btrunc:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
 //       CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
 //       CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
 //       CHECK: %[[mask:.*]] = arith.cmpi sgt, %[[boundVect]], %[[linearIndex]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/188782