[Mlir-commits] [mlir] [mlir][Vector] Fix mask lowering in_bounds and all-true mask elimination (PR #189477)
Benoit Jacob
llvmlistbot at llvm.org
Mon Mar 30 13:53:06 PDT 2026
https://github.com/bjacob updated https://github.com/llvm/llvm-project/pull/189477
>From 578f8ea72e3a42e994ea77699b52a9c2b7a48416 Mon Sep 17 00:00:00 2001
From: Benoit Jacob <benoit.jacob at amd.com>
Date: Mon, 30 Mar 2026 17:12:24 +0000
Subject: [PATCH] [mlir][Vector] Fix mask lowering in_bounds and all-true mask
elimination
When lowering vector.mask around transfer read/write, set in_bounds to
false on the new transfer instead of preserving the region body's attribute.
After the mask becomes a transfer operand, keeping in_bounds=true was
misleading for inactive lanes (padding / OOB semantics) and could combine
unsoundly with foldTransferFullMask + in_bounds.
When folding create_mask to all-true for unknown dimensions, require lower
and upper scalable bounds to agree before treating the dimension as
constant-sized. Using only a lower bound was unsound when the extent can
vary at runtime (e.g. dynamic slice full on most iterations but partial on
the last).
Signed-off-by: Benoit Jacob <benoit.jacob at amd.com>
---
.../Vector/Transforms/LowerVectorMask.cpp | 20 +++++++++--
.../Transforms/VectorMaskElimination.cpp | 36 +++++++++++++++----
...compose-masked-vectorize-and-cleanups.mlir | 8 ++---
mlir/test/Dialect/Vector/eliminate-masks.mlir | 20 +++++++++++
.../Dialect/Vector/lower-vector-mask.mlir | 30 +++++++++++++++-
.../vector-mask-lowering-transforms.mlir | 2 +-
6 files changed, 101 insertions(+), 15 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp
index 7730c4e7c950a..818d95000ae42 100644
--- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp
@@ -221,11 +221,22 @@ struct MaskedTransferReadOpPattern
return rewriter.notifyMatchFailure(
maskingOp, "Can't lower passthru to vector.transfer_read");
+ // The nested transfer often has in_bounds=true because vectorization set
+ // it when the read lived under vector.mask. After peeling the mask onto
+ // the transfer_read's mask operand, keeping in_bounds=true is misleading:
+ // masked-out lanes rely on padding for out-of-bounds semantics, and
+ // foldTransferFullMask + in_bounds can collapse to an unmasked read that
+ // assumes a full in-bounds super-vector load. Use out-of-bounds defaults
+ // here; canonicalization can still promote dims later when provably safe
+ // after the mask is folded away.
+ auto inBoundsAttr = rewriter.getBoolArrayAttr(
+ SmallVector<bool>(readOp.getVectorType().getRank(), false));
+
// Replace the `vector.mask` operation.
rewriter.replaceOpWithNewOp<TransferReadOp>(
maskingOp.getOperation(), readOp.getVectorType(), readOp.getBase(),
readOp.getIndices(), readOp.getPermutationMap(), readOp.getPadding(),
- maskingOp.getMask(), readOp.getInBounds());
+ maskingOp.getMask(), inBoundsAttr);
return success();
}
};
@@ -243,11 +254,16 @@ struct MaskedTransferWriteOpPattern
Type resultType =
writeOp.getResult() ? writeOp.getResult().getType() : Type();
+ // See MaskedTransferReadOpPattern: do not preserve in_bounds from the
+ // region body once the mask becomes a transfer operand.
+ auto inBoundsAttr = rewriter.getBoolArrayAttr(
+ SmallVector<bool>(writeOp.getVectorType().getRank(), false));
+
// Replace the `vector.mask` operation.
rewriter.replaceOpWithNewOp<TransferWriteOp>(
maskingOp.getOperation(), resultType, writeOp.getVector(),
writeOp.getBase(), writeOp.getIndices(), writeOp.getPermutationMap(),
- maskingOp.getMask(), writeOp.getInBounds());
+ maskingOp.getMask(), inBoundsAttr);
return success();
}
};
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorMaskElimination.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorMaskElimination.cpp
index 6f75ce7a04511..4194cdfa77b5f 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorMaskElimination.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorMaskElimination.cpp
@@ -52,28 +52,50 @@ LogicalResult resolveAllTrueCreateMaskOp(IRRewriter &rewriter,
}
for (auto [i, dimSize] : unknownDims) {
- // Compute the lower bound for the unknown dimension (i.e. the smallest
- // value it could be).
+ // Compute lower and upper bounds for the unknown dimension. We need both
+ // to agree (same constant or same scalable expression) before treating the
+ // mask as all-true: using only a lower bound is unsound when the value can
+ // vary at runtime (e.g. tensor.dim of a dynamic slice that is full-sized
+ // on most iterations but partial on the last). The lower bound analysis
+ // may then report the full size even though the upper bound analysis (or
+ // the differing tight range) shows the dimension is not a single constant.
FailureOr<ConstantOrScalableBound> dimLowerBound =
vector::ScalableValueBoundsConstraintSet::computeScalableBound(
dimSize, {}, vscaleRange.vscaleMin, vscaleRange.vscaleMax,
presburger::BoundType::LB);
if (failed(dimLowerBound))
return failure();
- auto dimLowerBoundSize = dimLowerBound->getSize();
+ FailureOr<ConstantOrScalableBound::BoundSize> dimLowerBoundSize =
+ dimLowerBound->getSize();
if (failed(dimLowerBoundSize))
return failure();
+
+ FailureOr<ConstantOrScalableBound> dimUpperBound =
+ vector::ScalableValueBoundsConstraintSet::computeScalableBound(
+ dimSize, {}, vscaleRange.vscaleMin, vscaleRange.vscaleMax,
+ presburger::BoundType::UB);
+ if (failed(dimUpperBound))
+ return failure();
+ FailureOr<ConstantOrScalableBound::BoundSize> dimUpperBoundSize =
+ dimUpperBound->getSize();
+ if (failed(dimUpperBoundSize))
+ return failure();
+
+ if (dimLowerBoundSize->scalable != dimUpperBoundSize->scalable ||
+ dimLowerBoundSize->baseSize != dimUpperBoundSize->baseSize)
+ return failure();
+
if (dimLowerBoundSize->scalable) {
- // 1. The lower bound, LB, is scalable. If LB is < the mask dim size then
- // this dim is not all-true.
+ // 1. The bound is scalable. If it is < the mask dim size then this dim
+ // is not all-true.
if (dimLowerBoundSize->baseSize < maskTypeDimSizes[i])
return failure();
} else {
- // 2. The lower bound, LB, is a constant.
+ // 2. The bound is a constant.
// - If the mask dim size is scalable then this dim is not all-true.
if (maskTypeDimScalableFlags[i])
return failure();
- // - If LB < the _fixed-size_ mask dim size then this dim is not all-true.
+ // - If the constant < the _fixed-size_ mask dim size then not all-true.
if (dimLowerBoundSize->baseSize < maskTypeDimSizes[i])
return failure();
}
diff --git a/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir b/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir
index 61fe3da34e1d5..0669437e9aaa3 100644
--- a/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-compose-masked-vectorize-and-cleanups.mlir
@@ -4,16 +4,16 @@
func.func @masked_matmul(%module: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
// CHECK: %[[MLHS:.*]] = vector.create_mask {{.*}} : vector<8x8xi1>
- // CHECK: %[[LHS:.*]] = vector.transfer_read %{{.*}}, %[[MLHS]] {in_bounds = [true, true]} : memref<?x?xf32, strided<[?, 1], offset: ?>>, vector<8x8xf32>
+ // CHECK: %[[LHS:.*]] = vector.transfer_read %{{.*}}, %[[MLHS]] : memref<?x?xf32, strided<[?, 1], offset: ?>>, vector<8x8xf32>
// CHECK: %[[MRHS:.*]] = vector.create_mask {{.*}} : vector<8x8xi1>
- // CHECK: %[[RHS:.*]] = vector.transfer_read %{{.*}}, %[[MRHS]] {in_bounds = [true, true]} : memref<?x?xf32, strided<[?, 1], offset: ?>>, vector<8x8xf32>
+ // CHECK: %[[RHS:.*]] = vector.transfer_read %{{.*}}, %[[MRHS]] : memref<?x?xf32, strided<[?, 1], offset: ?>>, vector<8x8xf32>
// CHECK: %[[MACC:.*]] = vector.create_mask {{.*}} : vector<8x8xi1>
- // CHECK: %[[ACC:.*]] = vector.transfer_read {{.*}}, %[[MACC]] {in_bounds = [true, true]} : memref<?x?xf32, strided<[?, 1], offset: ?>>, vector<8x8xf32>
+ // CHECK: %[[ACC:.*]] = vector.transfer_read {{.*}}, %[[MACC]] : memref<?x?xf32, strided<[?, 1], offset: ?>>, vector<8x8xf32>
// CHECK: %[[MRES:.*]] = vector.create_mask {{.*}} : vector<8x8x8xi1>
// CHECK: %[[RES:.*]] = vector.mask %[[MRES]] { vector.contract
// CHECK-SAME: : vector<8x8xf32>, vector<8x8xf32> into vector<8x8xf32>
// CHECK-SAME: : vector<8x8x8xi1> -> vector<8x8xf32>
- // CHECK: vector.transfer_write %[[RES]], %{{.*}}, %[[MACC]] {in_bounds = [true, true]} : vector<8x8xf32>, memref<?x?xf32, strided<[?, 1], offset: ?>>
+ // CHECK: vector.transfer_write %[[RES]], %{{.*}}, %[[MACC]] : vector<8x8xf32>, memref<?x?xf32, strided<[?, 1], offset: ?>>
linalg.matmul ins(%module, %arg1 : memref<?x?xf32>, memref<?x?xf32>) outs(%arg2 : memref<?x?xf32>)
return
}
diff --git a/mlir/test/Dialect/Vector/eliminate-masks.mlir b/mlir/test/Dialect/Vector/eliminate-masks.mlir
index 88be7e529bb9e..fa03e1e90ee7e 100644
--- a/mlir/test/Dialect/Vector/eliminate-masks.mlir
+++ b/mlir/test/Dialect/Vector/eliminate-masks.mlir
@@ -169,3 +169,23 @@ func.func @negative_value_bounds_scalable_dim_not_all_true(%tensor: tensor<2x100
"test.some_use"(%mask) : (vector<3x[4]xi1>) -> ()
return
}
+
+// -----
+
+// tensor.dim of a dynamic extent can match the vector lane count on some paths
+// but not others; LB-only analysis may incorrectly treat create_mask as
+// all-true. Require matching LB and UB before folding to constant_mask.
+//
+// CHECK-LABEL: @negative_dynamic_extent_dim_not_provably_constant
+// CHECK-NOT: vector.constant_mask
+// CHECK: %[[MASK:.*]] = vector.create_mask
+// CHECK: "test.some_use"(%[[MASK]]) : (vector<8x8xi1>) -> ()
+func.func @negative_dynamic_extent_dim_not_provably_constant(%t : tensor<8x?xf32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c8 = arith.constant 8 : index
+ %dim = tensor.dim %t, %c1 : tensor<8x?xf32>
+ %mask = vector.create_mask %c8, %dim : vector<8x8xi1>
+ "test.some_use"(%mask) : (vector<8x8xi1>) -> ()
+ return
+}
diff --git a/mlir/test/Dialect/Vector/lower-vector-mask.mlir b/mlir/test/Dialect/Vector/lower-vector-mask.mlir
index a8a1164e2f762..ddcc961378c3c 100644
--- a/mlir/test/Dialect/Vector/lower-vector-mask.mlir
+++ b/mlir/test/Dialect/Vector/lower-vector-mask.mlir
@@ -75,7 +75,35 @@ func.func @vector_gather(%arg0: tensor<64xf32>, %arg1: tensor<3xf32>) -> tensor<
// CHECK: %[[VAL_5:.*]] = arith.constant 3 : index
// CHECK: %[[VAL_6:.*]] = vector.create_mask %[[VAL_5]] : vector<4xi1>
// CHECK: %[[VAL_7:.*]] = vector.gather %[[VAL_0]][%[[VAL_4]]] [%[[VAL_3]]], %[[VAL_6]], %[[VAL_2]] : tensor<64xf32>, vector<4xindex>, vector<4xi1>, vector<4xf32> into vector<4xf32>
-// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_7]], %[[VAL_1]][%[[VAL_4]]], %[[VAL_6]] {in_bounds = [true]} : vector<4xf32>, tensor<3xf32>
+// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_7]], %[[VAL_1]][%[[VAL_4]]], %[[VAL_6]] : vector<4xf32>, tensor<3xf32>
+
+// -----
+
+// Like linalg vectorization of a reduction tile: `tensor<8x?xf32>` with a
+// `vector.create_mask` over the dynamic minor dim and `vector.mask` around a
+// 2-D `vector.transfer_read` (same shape as the IREE 1600x625 / 8-wide tail repro).
+//
+// CHECK-LABEL: func.func @masked_2d_transfer_read_dynamic_minor_dim
+// CHECK-SAME: %[[TILE:.*]]: tensor<8x?xf32>) -> vector<8x8xf32>
+// CHECK-NOT: vector.mask
+// CHECK: %[[DIM:.*]] = tensor.dim %[[TILE]], %{{.*}} : tensor<8x?xf32>
+// CHECK: %[[MASK:.*]] = vector.create_mask %{{.*}}, %[[DIM]] : vector<8x8xi1>
+// CHECK: %{{.*}} = vector.transfer_read %[[TILE]]{{\[}}%{{.*}}, %{{.*}}], %{{.*}}, %[[MASK]]{{.*}} : tensor<8x?xf32>, vector<8x8xf32>
+// CHECK: return %{{.*}} : vector<8x8xf32>
+// CHECK: }
+func.func @masked_2d_transfer_read_dynamic_minor_dim(%tile: tensor<8x?xf32>) -> vector<8x8xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c8 = arith.constant 8 : index
+ %cst = arith.constant 0.0 : f32
+ %dim1 = tensor.dim %tile, %c1 : tensor<8x?xf32>
+ %mask = vector.create_mask %c8, %dim1 : vector<8x8xi1>
+ %v = vector.mask %mask {
+ vector.transfer_read %tile[%c0, %c0], %cst {in_bounds = [true, true]}
+ : tensor<8x?xf32>, vector<8x8xf32>
+ } : vector<8x8xi1> -> vector<8x8xf32>
+ return %v : vector<8x8xf32>
+}
// -----
diff --git a/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir b/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir
index b5eb6e63f5a8d..7fd592c0f607b 100644
--- a/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-mask-lowering-transforms.mlir
@@ -108,7 +108,7 @@ func.func @transfer_read_3d(
%f0 = arith.constant 0.0 : f32
// CHECK: %[[mask:.*]] = vector.create_mask
// CHECK-NOT: vector.mask
- // CHECK: vector.transfer_read {{.*}}, %[[mask]] {in_bounds = [true, true, true]}
+ // CHECK: vector.transfer_read {{.*}}, %[[mask]]
// CHECK-SAME: : tensor<?x?x?xf32>, vector<2x1x7xf32>
%0 = vector.create_mask %arg0, %arg1, %arg2 : vector<2x1x7xi1>
%1 = vector.mask %0 {
More information about the Mlir-commits
mailing list