[Mlir-commits] [mlir] [MLIR][Vector] Fix i32 overflow in vector mask lowering for large index bounds (PR #188782)
Mehdi Amini
llvmlistbot at llvm.org
Thu Apr 16 03:57:39 PDT 2026
https://github.com/joker-eph updated https://github.com/llvm/llvm-project/pull/188782
>From 1d1b0f1e4def486959405b92587df11068308344 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Thu, 26 Mar 2026 07:57:44 -0700
Subject: [PATCH] [MLIR][Vector] Fix i32 overflow in vector mask lowering for
large index bounds
When lowering vector masks with `force-32bit-vector-indices=true`, the
mask bound (an `index`-typed value) was cast directly to `i32`. For values
larger than INT32_MAX (e.g., 2^51), the truncating cast wraps to a small
or negative i32, turning an all-true mask into an all-false one.
Fix both the fixed-size vector path (`buildVectorComparison` in
VectorTransforms.cpp) and the scalable vector path
(`VectorCreateMaskOpConversion` in ConvertVectorToLLVM.cpp) uniformly:
clamp the bound to INT32_MAX via `arith.minsi` in index type before the
cast. For fixed-size vectors `dim` would be a tighter bound (any b >= dim
already implies all-true), but INT32_MAX is used for consistency across
both paths.
Add a LIT regression test with a 2^51 bound and an integration test that
executes the lowered code to confirm the mask is all-true at runtime.
Fixes #113689
Assisted-by: Claude Code
---
.../VectorToLLVM/ConvertVectorToLLVM.cpp | 16 ++++++++++++--
.../Vector/Transforms/VectorTransforms.cpp | 10 +++++++++
.../VectorToLLVM/vector-mask-to-llvm.mlir | 8 +++++--
.../VectorToLLVM/vector-to-llvm.mlir | 12 +++++++---
.../VectorToLLVM/vector-xfer-to-llvm.mlir | 22 ++++++++++---------
.../Dialect/Vector/CPU/create-mask.mlir | 16 ++++++++++++++
6 files changed, 67 insertions(+), 17 deletions(-)
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 49f1815a8d941..43e0824fef6cd 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -1510,8 +1510,20 @@ class VectorCreateMaskOpConversion
rewriter, loc,
LLVM::getVectorType(idxType, dstType.getShape()[0],
/*isScalable=*/true));
- auto bound = getValueOrCreateCastToIndexLike(rewriter, loc, idxType,
- adaptor.getOperands()[0]);
+ Value maskBound = adaptor.getOperands()[0];
+ // When using 32-bit indices, cap the bound at INT32_MAX in index type
+ // before casting. For scalable vectors the runtime size (vscale * dim) is
+ // unknown at compile time, so we can't clamp to `dim` as in the fixed-size
+ // path. Clamping to INT32_MAX is safe because any realistic scalable vector
+ // size fits well below this limit, so a bound >= vscale*dim still produces
+ // an all-true mask after the comparison.
+ if (force32BitVectorIndices) {
+ Value maxBound =
+ arith::ConstantIndexOp::create(rewriter, loc, (1LL << 31) - 1);
+ maskBound = arith::MinSIOp::create(rewriter, loc, maskBound, maxBound);
+ }
+ auto bound =
+ getValueOrCreateCastToIndexLike(rewriter, loc, idxType, maskBound);
Value bounds = BroadcastOp::create(rewriter, loc, indices.getType(), bound);
Value comp = arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::slt,
indices, bounds);
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index a2d59010a2901..752610efc6992 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1361,6 +1361,16 @@ static Value buildVectorComparison(PatternRewriter &rewriter, Operation *op,
indices = arith::AddIOp::create(rewriter, loc, ov, indices);
}
// Construct the vector comparison.
+ // When using 32-bit indices, cap `b` at INT32_MAX before casting to prevent
+ // signed overflow for large index values (e.g., 2^51 wrapping to 0 in i32).
+ // Note: for fixed-size vectors, `dim` is a tighter bound (since any b >= dim
+ // already implies all-true), but we use INT32_MAX for uniformity with the
+ // scalable-vector path.
+ if (force32BitVectorIndices) {
+ Value maxBound =
+ arith::ConstantIndexOp::create(rewriter, loc, (1LL << 31) - 1);
+ b = arith::MinSIOp::create(rewriter, loc, b, maxBound);
+ }
Value bound = getValueOrCreateCastToIndexLike(rewriter, loc, idxType, b);
Value bounds =
vector::BroadcastOp::create(rewriter, loc, indices.getType(), bound);
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
index 91e5358622b69..5e3929931582a 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-mask-to-llvm.mlir
@@ -4,7 +4,9 @@
// CMP32-LABEL: @genbool_var_1d(
// CMP32-SAME: %[[ARG:.*]]: index)
// CMP32: %[[T0:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : vector<11xi32>
-// CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32
+// CMP32: %[[MAX:.*]] = arith.constant 2147483647 : index
+// CMP32: %[[CLAMPED:.*]] = arith.minsi %[[ARG]], %[[MAX]] : index
+// CMP32: %[[T1:.*]] = arith.index_cast %[[CLAMPED]] : index to i32
// CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<11xi32>
// CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<11xi32>
// CMP32: %[[T4:.*]] = arith.cmpi sgt, %[[T3]], %[[T0]] : vector<11xi32>
@@ -27,7 +29,9 @@ func.func @genbool_var_1d(%arg0: index) -> vector<11xi1> {
// CMP32-LABEL: @genbool_var_1d_scalable(
// CMP32-SAME: %[[ARG:.*]]: index)
// CMP32: %[[T0:.*]] = llvm.intr.stepvector : vector<[11]xi32>
-// CMP32: %[[T1:.*]] = arith.index_cast %[[ARG]] : index to i32
+// CMP32: %[[MAX:.*]] = arith.constant 2147483647 : index
+// CMP32: %[[CLAMPED:.*]] = arith.minsi %[[ARG]], %[[MAX]] : index
+// CMP32: %[[T1:.*]] = arith.index_cast %[[CLAMPED]] : index to i32
// CMP32: %[[T2:.*]] = llvm.insertelement %[[T1]], %{{.*}}[%{{.*}} : i32] : vector<[11]xi32>
// CMP32: %[[T3:.*]] = llvm.shufflevector %[[T2]], %{{.*}} [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] : vector<[11]xi32>
// CMP32: %[[T4:.*]] = arith.cmpi slt, %[[T0]], %[[T3]] : vector<[11]xi32>
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index d669a3bac3336..77f60b3172296 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -1544,7 +1544,9 @@ func.func @create_mask_0d(%num_elems : index) -> vector<i1> {
// CHECK-LABEL: func @create_mask_0d
// CHECK-SAME: %[[NUM_ELEMS:.*]]: index
// CHECK: %[[INDICES:.*]] = arith.constant dense<0> : vector<i32>
-// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32
+// CHECK: %[[MAX:.*]] = arith.constant 2147483647 : index
+// CHECK: %[[CLAMPED:.*]] = arith.minsi %[[NUM_ELEMS]], %[[MAX]] : index
+// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[CLAMPED]] : index to i32
// CHECK: %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
// CHECK: %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOUNDS]] : vector<1xi32> to vector<i32>
// CHECK: %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : vector<i32>
@@ -1560,7 +1562,9 @@ func.func @create_mask_1d(%num_elems : index) -> vector<4xi1> {
// CHECK-LABEL: func @create_mask_1d
// CHECK-SAME: %[[NUM_ELEMS:.*]]: index
// CHECK: %[[INDICES:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32>
-// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32
+// CHECK: %[[MAX:.*]] = arith.constant 2147483647 : index
+// CHECK: %[[CLAMPED:.*]] = arith.minsi %[[NUM_ELEMS]], %[[MAX]] : index
+// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[CLAMPED]] : index to i32
// CHECK: %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]]
// CHECK: %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]]
// CHECK: %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS]], %[[INDICES]] : vector<4xi32>
@@ -1576,7 +1580,9 @@ func.func @create_mask_1d_scalable(%num_elems : index) -> vector<[4]xi1> {
// CHECK-LABEL: func @create_mask_1d_scalable
// CHECK-SAME: %[[NUM_ELEMS:.*]]: index
// CHECK: %[[INDICES:.*]] = llvm.intr.stepvector : vector<[4]xi32>
-// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32
+// CHECK: %[[MAX:.*]] = arith.constant 2147483647 : index
+// CHECK: %[[CLAMPED:.*]] = arith.minsi %[[NUM_ELEMS]], %[[MAX]] : index
+// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[CLAMPED]] : index to i32
// CHECK: %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]], {{.*}} : vector<[4]xi32>
// CHECK: %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]], {{.*}} : vector<[4]xi32>
// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS]] : vector<[4]xi32>
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir
index d3f6d7eca90b4..18deadd0d7a79 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-xfer-to-llvm.mlir
@@ -28,13 +28,12 @@ func.func @transfer_read_write_1d(%A : memref<?xf32>, %base: index) -> vector<17
//
// 4. Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-// CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] :
-// CMP32-SAME: index to i32
-// CMP64-SAME: index to i64
+// Note: for 32-bit indices, the bound is first clamped via arith.minsi to
+// prevent i32 overflow for large index values.
+// CHECK: %[[btrunc:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
// CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
// CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
// CHECK: %[[mask:.*]] = arith.cmpi sgt, %[[boundVect]], %[[linearIndex]] : vector<17x[[$IDX_TYPE]]>
-// CMP64-SAME: : vector<17xi64>
//
// 5. Bitcast to vector form.
// CHECK: %[[gep:.*]] = llvm.getelementptr %{{.*}} :
@@ -51,8 +50,7 @@ func.func @transfer_read_write_1d(%A : memref<?xf32>, %base: index) -> vector<17
//
// 2. Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-// CHECK: %[[btrunc_b:.*]] = arith.index_cast %[[BOUND_b]]
-// CMP32-SAME: index to i32
+// CHECK: %[[btrunc_b:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
// CHECK: %[[boundVecInsert_b:.*]] = llvm.insertelement %[[btrunc_b]]
// CHECK: %[[boundVect_b:.*]] = llvm.shufflevector %[[boundVecInsert_b]]
// CHECK: %[[mask_b:.*]] = arith.cmpi sgt, %[[boundVect_b]],
@@ -93,7 +91,9 @@ func.func @transfer_read_write_1d_scalable(%A : memref<?xf32>, %base: index) ->
//
// 4. Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-// CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] : index to [[$IDX_TYPE]]
+// Note: for 32-bit indices, the bound is first clamped via arith.minsi to
+// prevent i32 overflow for large index values.
+// CHECK: %[[btrunc:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
// CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
// CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
// CHECK: %[[mask:.*]] = arith.cmpi slt, %[[linearIndex]], %[[boundVect]]
@@ -117,7 +117,7 @@ func.func @transfer_read_write_1d_scalable(%A : memref<?xf32>, %base: index) ->
//
// 3. Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-// CHECK: %[[btrunc_b:.*]] = arith.index_cast %[[BOUND_b]] : index to [[$IDX_TYPE]]
+// CHECK: %[[btrunc_b:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
// CHECK: %[[boundVecInsert_b:.*]] = llvm.insertelement %[[btrunc_b]]
// CHECK: %[[boundVect_b:.*]] = llvm.shufflevector %[[boundVecInsert_b]]
// CHECK: %[[mask_b:.*]] = arith.cmpi slt, %[[linearIndex_b]],
@@ -200,7 +200,8 @@ func.func @transfer_read_2d_to_1d(%A : memref<?x?xf32>, %base0: index, %base1: i
//
// Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-// CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] : index to [[$IDX_TYPE]]
+// Note: for 32-bit indices, the bound is first clamped via arith.minsi.
+// CHECK: %[[btrunc:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
// CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
// CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
// CHECK: %[[mask:.*]] = arith.cmpi sgt, %[[boundVect]], %[[linearIndex]]
@@ -225,7 +226,8 @@ func.func @transfer_read_2d_to_1d_scalable(%A : memref<?x?xf32>, %base0: index,
//
// Create bound vector to compute in-bound mask:
// [ 0 .. vector_length - 1 ] < [ dim - offset .. dim - offset ]
-// CHECK: %[[btrunc:.*]] = arith.index_cast %[[BOUND]] : index to [[$IDX_TYPE]]
+// Note: for 32-bit indices, the bound is first clamped via arith.minsi.
+// CHECK: %[[btrunc:.*]] = arith.index_cast %{{.*}} : index to [[$IDX_TYPE]]
// CHECK: %[[boundVecInsert:.*]] = llvm.insertelement %[[btrunc]]
// CHECK: %[[boundVect:.*]] = llvm.shufflevector %[[boundVecInsert]]
// CHECK: %[[mask:.*]] = arith.cmpi slt, %[[linearIndex]], %[[boundVect]]
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/create-mask.mlir b/mlir/test/Integration/Dialect/Vector/CPU/create-mask.mlir
index e5d6fa5bbaf0c..6ef9ff32fc52d 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/create-mask.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/create-mask.mlir
@@ -33,6 +33,15 @@ func.func @entry() {
// CHECK: ( 1, 1, 1, 1, 1 )
// CHECK: ( 1, 1, 1, 1, 1 )
+ // Verify that bounds larger than INT32_MAX produce an all-true mask.
+ // With force-32bit-vector-indices (the default), a naive truncating cast of
+ // 2^51 to i32 wraps to 0 and produces an all-false mask; the fix clamps the
+ // bound to INT32_MAX before casting.
+ %large = arith.constant 2251799813685248 : index // 2^51
+ %9 = func.call @create_mask_large_bound(%large) : (index) -> vector<5xi1>
+ vector.print %9 : vector<5xi1>
+ // CHECK: ( 1, 1, 1, 1, 1 )
+
//
// 2-D.
//
@@ -112,3 +121,10 @@ func.func @entry() {
return
}
+
+// Helper for the large-bound overflow regression test: takes a runtime index
+// so that vector.create_mask cannot be constant-folded before lowering.
+func.func @create_mask_large_bound(%n : index) -> vector<5xi1> {
+ %0 = vector.create_mask %n : vector<5xi1>
+ return %0 : vector<5xi1>
+}
More information about the Mlir-commits
mailing list