[Mlir-commits] [mlir] [mlir][vector] Add FoldTransferReadOfEmptyTensor. (PR #196599)
Erick Ochoa Lopez
llvmlistbot at llvm.org
Wed May 13 06:30:43 PDT 2026
https://github.com/amd-eochoalo updated https://github.com/llvm/llvm-project/pull/196599
>From 39ad119dc3ffb11b7c123e5b74658ef8e16d8721 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Fri, 8 May 2026 14:13:56 -0400
Subject: [PATCH 1/5] [mlir][vector] Add FoldTransferReadOfEmptyTensor.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When reading an empty tensor, one can fold it to the following cases:
Case 1 — fully in-bounds, no mask:
%e = tensor.empty() : tensor<128xf16>
%r = vector.transfer_read %e[%c0], %pad {in_bounds = [true]}
->
%r = ub.poison : vector<128xf16>
Case 2 — fully in-bounds, masked:
%e = tensor.empty() : tensor<128xf16>
%r = vector.transfer_read %e[%c0], %pad, %mask {in_bounds = [true]}
->
%poison = ub.poison : vector<128xf16>
%bcast = vector.broadcast %pad : f16 to vector<128xf16>
%r = arith.select %mask, %poison, %bcast
Case 3 — not fully in-bounds, no mask:
%e = tensor.empty() : tensor<100xf16>
%r = vector.transfer_read %e[%c0], %pad : tensor<100xf16>, vector<128xf16>
->
%r = vector.broadcast %pad : f16 to vector<128xf16>
Case 4 — not fully in-bounds, masked:
%e = tensor.empty() : tensor<100xf16>
%r = vector.transfer_read %e[%c0], %pad, %mask : tensor<100xf16>, vector<128xf16>
->
%r = vector.broadcast %pad : f16 to vector<128xf16>
---
mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 74 +++++++++++-
mlir/test/Dialect/Vector/canonicalize.mlir | 128 +++++++++++++++++++++
2 files changed, 201 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 51be1e4431e70..f51a3b50f5101 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -5529,6 +5529,77 @@ static AffineMap inverseWithUnusedDims(AffineMap map) {
}
namespace {
+
+/// Folds transfer_read(tensor.empty).
+///
+/// Since tensor.empty has unspecified contents, reading from it produces
+/// an unspecified value, which is exactly the semantics of ub.poison.
+///
+/// Case 1 — fully in-bounds, no mask:
+/// %e = tensor.empty() : tensor<128xf16>
+/// %r = vector.transfer_read %e[%c0], %pad {in_bounds = [true]}
+/// ->
+/// %r = ub.poison : vector<128xf16>
+///
+/// Case 2 — fully in-bounds, masked:
+/// %e = tensor.empty() : tensor<128xf16>
+/// %r = vector.transfer_read %e[%c0], %pad, %mask {in_bounds = [true]}
+/// ->
+/// %poison = ub.poison : vector<128xf16>
+/// %bcast = vector.broadcast %pad : f16 to vector<128xf16>
+/// %r = arith.select %mask, %poison, %bcast
+///
+/// Case 3 — not fully in-bounds (with or without mask):
+/// Out-of-bounds lanes produce pad, in-bounds lanes read unspecified
+/// contents from tensor.empty, so we may choose pad for all lanes.
+/// ->
+/// %r = vector.broadcast %pad : f16 to vector<128xf16>
+struct FoldTransferReadOfEmptyTensor : public OpRewritePattern<TransferReadOp> {
+ using Base::Base;
+
+ LogicalResult matchAndRewrite(TransferReadOp op,
+ PatternRewriter &rewriter) const override {
+ if (!op.hasPureTensorSemantics())
+ return failure();
+
+ if (!op.getBase().getDefiningOp<tensor::EmptyOp>())
+ return failure();
+
+ if (!op.getPermutationMap().isMinorIdentity())
+ return failure();
+
+ bool fullyInBounds =
+ llvm::all_of(op.getInBoundsValues(), [](bool v) { return v; });
+ TypedValue<VectorType> mask = op.getMask();
+
+ if (mask && fullyInBounds) {
+ Value rPad = op.getPadding();
+ assert(!isa<VectorType>(rPad.getType()) &&
+ "masked transfers on vector element types are not supported; "
+ "see verifyTransferOp");
+ Value poison = ub::PoisonOp::create(rewriter, op.getLoc(), op.getType());
+ Value padVal = vector::BroadcastOp::create(rewriter, rPad.getLoc(),
+ op.getType(), rPad);
+ rewriter.replaceOpWithNewOp<arith::SelectOp>(op, mask, poison, padVal);
+ return success();
+ }
+
+ if (!mask && fullyInBounds) {
+ rewriter.replaceOp(
+ op, ub::PoisonOp::create(rewriter, op.getLoc(), op.getType()));
+ return success();
+ }
+
+ // Not fully in-bounds (with or without mask): out-of-bounds lanes
+ // produce pad, and in-bounds lanes read unspecified contents from
+ // tensor.empty, so we may choose pad for those too.
+ Value rPad = op.getPadding();
+ rewriter.replaceOp(op, vector::BroadcastOp::create(rewriter, rPad.getLoc(),
+ op.getType(), rPad));
+ return success();
+ }
+};
+
/// Store to load forwarding for transfer operations with permuation maps.
/// Even if the permutation maps are different we can still propagate the store
/// into the load if the size of the dimensions read and written match. Then we
@@ -5629,7 +5700,8 @@ struct TransferReadAfterWriteToBroadcast
void TransferReadOp::getCanonicalizationPatterns(RewritePatternSet &results,
MLIRContext *context) {
- results.add<TransferReadAfterWriteToBroadcast>(context);
+ results.add<FoldTransferReadOfEmptyTensor, TransferReadAfterWriteToBroadcast>(
+ context);
}
FailureOr<std::optional<SmallVector<Value>>>
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index 6aa92ab79a0dd..dd51ebd4d825c 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -4407,3 +4407,131 @@ func.func @no_fold_alltrue_mask_empty_body_scalar_result(
%result = vector.mask %all_true, %passthru { vector.yield %val : i32 } : vector<1xi1> -> i32
return %result : i32
}
+
+// -----
+
+// transfer_read from a memref (not tensor semantics): pattern must not fire.
+// CHECK-LABEL: func.func @negative_read_empty_not_tensor_semantics
+// CHECK: vector.transfer_read
+func.func @negative_read_empty_not_tensor_semantics(%m: memref<128xf16>) -> vector<128xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %r = vector.transfer_read %m[%c0], %cst {in_bounds = [true]}
+ : memref<128xf16>, vector<128xf16>
+ return %r : vector<128xf16>
+}
+
+// -----
+
+// transfer_read from a regular tensor (not tensor.empty): pattern must not fire.
+// CHECK-LABEL: func.func @negative_read_not_empty_tensor
+// CHECK: vector.transfer_read
+func.func @negative_read_not_empty_tensor(%t: tensor<128xf16>) -> vector<128xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %r = vector.transfer_read %t[%c0], %cst {in_bounds = [true]}
+ : tensor<128xf16>, vector<128xf16>
+ return %r : vector<128xf16>
+}
+
+// -----
+
+// transfer_read from tensor.empty with a transposing permutation map: bail.
+// CHECK-LABEL: func.func @negative_read_empty_non_identity_map
+// CHECK: tensor.empty
+// CHECK: vector.transfer_read
+func.func @negative_read_empty_non_identity_map() -> vector<64x128xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %e = tensor.empty() : tensor<128x64xf16>
+ %r = vector.transfer_read %e[%c0, %c0], %cst
+ {in_bounds = [true, true], permutation_map = affine_map<(d0, d1) -> (d1, d0)>}
+ : tensor<128x64xf16>, vector<64x128xf16>
+ return %r : vector<64x128xf16>
+}
+
+// -----
+
+// Unmasked, in-bounds read from tensor.empty -> ub.poison.
+// CHECK-LABEL: func.func @fold_read_empty_unmasked_inbounds
+// CHECK-NOT: tensor.empty
+// CHECK-NOT: vector.transfer_read
+// CHECK: %[[POISON:.*]] = ub.poison : vector<128xf16>
+// CHECK: return %[[POISON]]
+func.func @fold_read_empty_unmasked_inbounds() -> vector<128xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %e = tensor.empty() : tensor<128xf16>
+ %r = vector.transfer_read %e[%c0], %cst {in_bounds = [true]}
+ : tensor<128xf16>, vector<128xf16>
+ return %r : vector<128xf16>
+}
+
+// -----
+
+// Unmasked, out-of-bounds read from tensor.empty -> broadcast(pad).
+// CHECK-LABEL: func.func @fold_read_empty_unmasked_outofbounds
+// CHECK-NOT: tensor.empty
+// CHECK-NOT: vector.transfer_read
+// CHECK: %[[PAD:.+]] = arith.constant dense<0.000000e+00> : vector<256xf16>
+// CHECK: return %[[PAD]]
+func.func @fold_read_empty_unmasked_outofbounds() -> vector<256xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %e = tensor.empty() : tensor<128xf16>
+ %r = vector.transfer_read %e[%c0], %cst
+ : tensor<128xf16>, vector<256xf16>
+ return %r : vector<256xf16>
+}
+
+// -----
+
+// Masked, in-bounds read from tensor.empty with a concrete pad value ->
+// select(mask, poison, broadcast(pad)).
+// The canonicalizer also folds select(mask, poison, x) -> x.
+// CHECK-LABEL: func.func @fold_read_empty_masked_real_pad
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<128xf16>
+// CHECK: return %[[CST]]
+func.func @fold_read_empty_masked_real_pad(%mask: vector<128xi1>) -> vector<128xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %e = tensor.empty() : tensor<128xf16>
+ %r = vector.transfer_read %e[%c0], %cst, %mask {in_bounds = [true]}
+ : tensor<128xf16>, vector<128xf16>
+ return %r : vector<128xf16>
+}
+
+// -----
+
+// Masked read from tensor.empty where padding is ub.poison -> just ub.poison.
+// CHECK-LABEL: func.func @fold_read_empty_masked_poison_pad
+// CHECK-NOT: tensor.empty
+// CHECK-NOT: vector.transfer_read
+// CHECK-NOT: arith.select
+// CHECK: %[[POISON:.*]] = ub.poison : vector<128xf16>
+// CHECK: return %[[POISON]]
+func.func @fold_read_empty_masked_poison_pad(%mask: vector<128xi1>) -> vector<128xf16> {
+ %c0 = arith.constant 0 : index
+ %pad = ub.poison : f16
+ %e = tensor.empty() : tensor<128xf16>
+ %r = vector.transfer_read %e[%c0], %pad, %mask {in_bounds = [true]}
+ : tensor<128xf16>, vector<128xf16>
+ return %r : vector<128xf16>
+}
+
+// -----
+
+// Unmasked read from a dynamically-shaped tensor.empty -> ub.poison.
+// CHECK-LABEL: func.func @fold_read_empty_dynamic_unmasked
+// CHECK-NOT: tensor.empty
+// CHECK-NOT: vector.transfer_read
+// CHECK: %[[POISON:.*]] = ub.poison : vector<128xf16>
+// CHECK: return %[[POISON]]
+func.func @fold_read_empty_dynamic_unmasked(%sz: index) -> vector<128xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %e = tensor.empty(%sz) : tensor<?xf16>
+ %r = vector.transfer_read %e[%c0], %cst {in_bounds = [true]}
+ : tensor<?xf16>, vector<128xf16>
+ return %r : vector<128xf16>
+}
>From a7ed600ac3440fb38efc14bfef8c958ca178c83e Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 11 May 2026 10:52:11 -0400
Subject: [PATCH 2/5] fold to poison
---
mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 35 +++++-----------------
mlir/test/Dialect/Vector/canonicalize.mlir | 7 +++--
2 files changed, 11 insertions(+), 31 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index f51a3b50f5101..50bbce0bc9535 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -5541,17 +5541,12 @@ namespace {
/// ->
/// %r = ub.poison : vector<128xf16>
///
-/// Case 2 — fully in-bounds, masked:
-/// %e = tensor.empty() : tensor<128xf16>
-/// %r = vector.transfer_read %e[%c0], %pad, %mask {in_bounds = [true]}
-/// ->
-/// %poison = ub.poison : vector<128xf16>
-/// %bcast = vector.broadcast %pad : f16 to vector<128xf16>
-/// %r = arith.select %mask, %poison, %bcast
-///
-/// Case 3 — not fully in-bounds (with or without mask):
-/// Out-of-bounds lanes produce pad, in-bounds lanes read unspecified
-/// contents from tensor.empty, so we may choose pad for all lanes.
+/// Case 2 — has mask, or not fully in-bounds (or both):
+/// In-bounds lanes read unspecified (poison-refinable) contents from
+/// tensor.empty, so we may replace them with pad. Out-of-bounds lanes
+/// already produce pad. For masked in-bounds reads, a
+/// select(mask, poison, pad) is also equivalent to pad because poison
+/// can be refined to any value.
/// ->
/// %r = vector.broadcast %pad : f16 to vector<128xf16>
struct FoldTransferReadOfEmptyTensor : public OpRewritePattern<TransferReadOp> {
@@ -5570,29 +5565,13 @@ struct FoldTransferReadOfEmptyTensor : public OpRewritePattern<TransferReadOp> {
bool fullyInBounds =
llvm::all_of(op.getInBoundsValues(), [](bool v) { return v; });
- TypedValue<VectorType> mask = op.getMask();
-
- if (mask && fullyInBounds) {
- Value rPad = op.getPadding();
- assert(!isa<VectorType>(rPad.getType()) &&
- "masked transfers on vector element types are not supported; "
- "see verifyTransferOp");
- Value poison = ub::PoisonOp::create(rewriter, op.getLoc(), op.getType());
- Value padVal = vector::BroadcastOp::create(rewriter, rPad.getLoc(),
- op.getType(), rPad);
- rewriter.replaceOpWithNewOp<arith::SelectOp>(op, mask, poison, padVal);
- return success();
- }
- if (!mask && fullyInBounds) {
+ if (!op.getMask() && fullyInBounds) {
rewriter.replaceOp(
op, ub::PoisonOp::create(rewriter, op.getLoc(), op.getType()));
return success();
}
- // Not fully in-bounds (with or without mask): out-of-bounds lanes
- // produce pad, and in-bounds lanes read unspecified contents from
- // tensor.empty, so we may choose pad for those too.
Value rPad = op.getPadding();
rewriter.replaceOp(op, vector::BroadcastOp::create(rewriter, rPad.getLoc(),
op.getType(), rPad));
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index dd51ebd4d825c..f3b7a6f013a43 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -4487,8 +4487,8 @@ func.func @fold_read_empty_unmasked_outofbounds() -> vector<256xf16> {
// -----
// Masked, in-bounds read from tensor.empty with a concrete pad value ->
-// select(mask, poison, broadcast(pad)).
-// The canonicalizer also folds select(mask, poison, x) -> x.
+// broadcast(pad). (select(mask, poison, x) == x, so we emit broadcast
+// directly without going through select.)
// CHECK-LABEL: func.func @fold_read_empty_masked_real_pad
// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<128xf16>
// CHECK: return %[[CST]]
@@ -4503,7 +4503,8 @@ func.func @fold_read_empty_masked_real_pad(%mask: vector<128xi1>) -> vector<128x
// -----
-// Masked read from tensor.empty where padding is ub.poison -> just ub.poison.
+// Masked read from tensor.empty where padding is ub.poison ->
+// broadcast(poison_scalar) which folds to ub.poison vector.
// CHECK-LABEL: func.func @fold_read_empty_masked_poison_pad
// CHECK-NOT: tensor.empty
// CHECK-NOT: vector.transfer_read
>From aacdde30498f473b4fe8c0933d4a383390ea4e4f Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Tue, 12 May 2026 09:23:41 -0400
Subject: [PATCH 3/5] Bailed if masked
---
mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 3 +++
mlir/test/Dialect/Vector/canonicalize.mlir | 14 ++++++++++++++
2 files changed, 17 insertions(+)
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 50bbce0bc9535..310ba3032db8a 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -5563,6 +5563,9 @@ struct FoldTransferReadOfEmptyTensor : public OpRewritePattern<TransferReadOp> {
if (!op.getPermutationMap().isMinorIdentity())
return failure();
+ if (op.isMasked())
+ return failure();
+
bool fullyInBounds =
llvm::all_of(op.getInBoundsValues(), [](bool v) { return v; });
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index f3b7a6f013a43..827dbbc902191 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -4436,6 +4436,20 @@ func.func @negative_read_not_empty_tensor(%t: tensor<128xf16>) -> vector<128xf16
// -----
+// transfer_read from tensor.empty wrapped in vector.mask: pattern must not fire.
+// CHECK-LABEL: func.func @negative_read_empty_vector_mask
+// CHECK: tensor.empty
+// CHECK: vector.mask
+func.func @negative_read_empty_vector_mask(%mask: vector<128xi1>) -> vector<128xf16> {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.0 : f16
+ %e = tensor.empty() : tensor<128xf16>
+ %r = vector.mask %mask { vector.transfer_read %e[%c0], %cst {in_bounds = [true]} : tensor<128xf16>, vector<128xf16> } : vector<128xi1> -> vector<128xf16>
+ return %r : vector<128xf16>
+}
+
+// -----
+
// transfer_read from tensor.empty with a transposing permutation map: bail.
// CHECK-LABEL: func.func @negative_read_empty_non_identity_map
// CHECK: tensor.empty
>From 6d59bb7ed1a3896015782fd32cc6b46a4f7d7cae Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Wed, 13 May 2026 09:23:07 -0400
Subject: [PATCH 4/5] Use fully in-bounds and MaskableOpInterface
---
mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 49 +++++++++++-----------
mlir/test/Dialect/Vector/canonicalize.mlir | 25 +++++------
2 files changed, 38 insertions(+), 36 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 310ba3032db8a..2dee914a5f082 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -25,6 +25,7 @@
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/Dialect/Utils/VerificationUtils.h"
+#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
@@ -5530,7 +5531,7 @@ static AffineMap inverseWithUnusedDims(AffineMap map) {
namespace {
-/// Folds transfer_read(tensor.empty).
+/// Folds transfer_read(tensor.empty) when fully in-bounds.
///
/// Since tensor.empty has unspecified contents, reading from it produces
/// an unspecified value, which is exactly the semantics of ub.poison.
@@ -5541,19 +5542,23 @@ namespace {
/// ->
/// %r = ub.poison : vector<128xf16>
///
-/// Case 2 — has mask, or not fully in-bounds (or both):
-/// In-bounds lanes read unspecified (poison-refinable) contents from
-/// tensor.empty, so we may replace them with pad. Out-of-bounds lanes
-/// already produce pad. For masked in-bounds reads, a
-/// select(mask, poison, pad) is also equivalent to pad because poison
-/// can be refined to any value.
+/// Case 2 — fully in-bounds, masked:
+/// %e = tensor.empty() : tensor<128xf16>
+/// %r = vector.transfer_read %e[%c0], %pad, %mask {in_bounds = [true]}
/// ->
/// %r = vector.broadcast %pad : f16 to vector<128xf16>
-struct FoldTransferReadOfEmptyTensor : public OpRewritePattern<TransferReadOp> {
- using Base::Base;
-
- LogicalResult matchAndRewrite(TransferReadOp op,
- PatternRewriter &rewriter) const override {
+///
+/// In-bounds lanes read unspecified (poison-refinable) contents from
+/// tensor.empty, so we may replace them with pad. For masked in-bounds
+/// reads, a select(mask, poison, pad) is also equivalent to pad because
+/// poison can be refined to any value.
+struct FoldTransferReadOfEmptyTensor
+ : public vector::MaskableOpRewritePattern<TransferReadOp> {
+ using MaskableOpRewritePattern::MaskableOpRewritePattern;
+
+ FailureOr<Value>
+ matchAndRewriteMaskableOp(TransferReadOp op, MaskingOpInterface maskingOp,
+ PatternRewriter &rewriter) const override {
if (!op.hasPureTensorSemantics())
return failure();
@@ -5563,22 +5568,18 @@ struct FoldTransferReadOfEmptyTensor : public OpRewritePattern<TransferReadOp> {
if (!op.getPermutationMap().isMinorIdentity())
return failure();
- if (op.isMasked())
+ if (op.hasOutOfBoundsDim())
return failure();
- bool fullyInBounds =
- llvm::all_of(op.getInBoundsValues(), [](bool v) { return v; });
-
- if (!op.getMask() && fullyInBounds) {
- rewriter.replaceOp(
- op, ub::PoisonOp::create(rewriter, op.getLoc(), op.getType()));
- return success();
+ if (!maskingOp && !op.getMask()) {
+ return ub::PoisonOp::create(rewriter, op.getLoc(), op.getType())
+ ->getResult(0);
}
- Value rPad = op.getPadding();
- rewriter.replaceOp(op, vector::BroadcastOp::create(rewriter, rPad.getLoc(),
- op.getType(), rPad));
- return success();
+ Value pad = op.getPadding();
+ return vector::BroadcastOp::create(rewriter, pad.getLoc(), op.getType(),
+ pad)
+ ->getResult(0);
}
};
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index 827dbbc902191..a5be8ad4a8c79 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -4436,11 +4436,14 @@ func.func @negative_read_not_empty_tensor(%t: tensor<128xf16>) -> vector<128xf16
// -----
-// transfer_read from tensor.empty wrapped in vector.mask: pattern must not fire.
-// CHECK-LABEL: func.func @negative_read_empty_vector_mask
-// CHECK: tensor.empty
-// CHECK: vector.mask
-func.func @negative_read_empty_vector_mask(%mask: vector<128xi1>) -> vector<128xf16> {
+// transfer_read from tensor.empty wrapped in vector.mask -> broadcast(pad).
+// CHECK-LABEL: func.func @fold_read_empty_vector_mask
+// CHECK-NOT: tensor.empty
+// CHECK-NOT: vector.transfer_read
+// CHECK-NOT: vector.mask
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<128xf16>
+// CHECK: return %[[CST]]
+func.func @fold_read_empty_vector_mask(%mask: vector<128xi1>) -> vector<128xf16> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f16
%e = tensor.empty() : tensor<128xf16>
@@ -4483,13 +4486,11 @@ func.func @fold_read_empty_unmasked_inbounds() -> vector<128xf16> {
// -----
-// Unmasked, out-of-bounds read from tensor.empty -> broadcast(pad).
-// CHECK-LABEL: func.func @fold_read_empty_unmasked_outofbounds
-// CHECK-NOT: tensor.empty
-// CHECK-NOT: vector.transfer_read
-// CHECK: %[[PAD:.+]] = arith.constant dense<0.000000e+00> : vector<256xf16>
-// CHECK: return %[[PAD]]
-func.func @fold_read_empty_unmasked_outofbounds() -> vector<256xf16> {
+// Out-of-bounds read from tensor.empty: pattern must not fire.
+// CHECK-LABEL: func.func @negative_read_empty_outofbounds
+// CHECK: tensor.empty
+// CHECK: vector.transfer_read
+func.func @negative_read_empty_outofbounds() -> vector<256xf16> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f16
%e = tensor.empty() : tensor<128xf16>
>From 2df8ffca95f386676c6dbbc04dca125c1f91984b Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Wed, 13 May 2026 09:28:57 -0400
Subject: [PATCH 5/5] Update tests
---
mlir/test/Dialect/Vector/canonicalize.mlir | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index a5be8ad4a8c79..e45194386bfd7 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -4436,14 +4436,14 @@ func.func @negative_read_not_empty_tensor(%t: tensor<128xf16>) -> vector<128xf16
// -----
-// transfer_read from tensor.empty wrapped in vector.mask -> broadcast(pad).
-// CHECK-LABEL: func.func @fold_read_empty_vector_mask
+// Masked transfer_read from tensor.empty -> broadcast(pad).
+// CHECK-LABEL: func.func @fold_read_empty_masked
// CHECK-NOT: tensor.empty
// CHECK-NOT: vector.transfer_read
// CHECK-NOT: vector.mask
// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<128xf16>
// CHECK: return %[[CST]]
-func.func @fold_read_empty_vector_mask(%mask: vector<128xi1>) -> vector<128xf16> {
+func.func @fold_read_empty_masked(%mask: vector<128xi1>) -> vector<128xf16> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f16
%e = tensor.empty() : tensor<128xf16>
@@ -4501,13 +4501,13 @@ func.func @negative_read_empty_outofbounds() -> vector<256xf16> {
// -----
-// Masked, in-bounds read from tensor.empty with a concrete pad value ->
+// In-bounds read with mask from tensor.empty with a concrete pad value ->
// broadcast(pad). (select(mask, poison, x) == x, so we emit broadcast
// directly without going through select.)
-// CHECK-LABEL: func.func @fold_read_empty_masked_real_pad
+// CHECK-LABEL: func.func @fold_read_empty_with_mask_real_pad
// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<128xf16>
// CHECK: return %[[CST]]
-func.func @fold_read_empty_masked_real_pad(%mask: vector<128xi1>) -> vector<128xf16> {
+func.func @fold_read_empty_with_mask_real_pad(%mask: vector<128xi1>) -> vector<128xf16> {
%c0 = arith.constant 0 : index
%cst = arith.constant 0.0 : f16
%e = tensor.empty() : tensor<128xf16>
@@ -4518,15 +4518,15 @@ func.func @fold_read_empty_masked_real_pad(%mask: vector<128xi1>) -> vector<128x
// -----
-// Masked read from tensor.empty where padding is ub.poison ->
+// Read with mask from tensor.empty where padding is ub.poison ->
// broadcast(poison_scalar) which folds to ub.poison vector.
-// CHECK-LABEL: func.func @fold_read_empty_masked_poison_pad
+// CHECK-LABEL: func.func @fold_read_empty_with_mask_poison_pad
// CHECK-NOT: tensor.empty
// CHECK-NOT: vector.transfer_read
// CHECK-NOT: arith.select
// CHECK: %[[POISON:.*]] = ub.poison : vector<128xf16>
// CHECK: return %[[POISON]]
-func.func @fold_read_empty_masked_poison_pad(%mask: vector<128xi1>) -> vector<128xf16> {
+func.func @fold_read_empty_with_mask_poison_pad(%mask: vector<128xi1>) -> vector<128xf16> {
%c0 = arith.constant 0 : index
%pad = ub.poison : f16
%e = tensor.empty() : tensor<128xf16>
More information about the Mlir-commits
mailing list