[Mlir-commits] [mlir] [MLIR][Vector] Fix a narrow byte emulation alignment issue (PR #137970)
Alan Li
llvmlistbot at llvm.org
Wed Apr 30 07:25:51 PDT 2025
https://github.com/lialan created https://github.com/llvm/llvm-project/pull/137970
This is a follow up of https://github.com/llvm/llvm-project/pull/133231 which fixes an issue with partially loading front byte. Basically this patch:
* removes one unnecessary guard.
* Fix an issue when the front byte is not partially updated, we do not need to increment the count.
>From 759e87a3d4472b98e93d66027717ec969bcae937 Mon Sep 17 00:00:00 2001
From: Alan Li <me at alanli.org>
Date: Wed, 30 Apr 2025 14:11:19 +0000
Subject: [PATCH] [MLIR][Vector] Fix a narrow byte emulation alignment issue
---
.../Transforms/VectorEmulateNarrowType.cpp | 21 +++++++++----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
index a560aa1b1e680..0f6c9ea267763 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
@@ -621,12 +621,6 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
? 0
: getConstantIntValue(linearizedInfo.intraDataOffset);
- if (!foldedNumFrontPadElems) {
- return rewriter.notifyMatchFailure(
- op, "subbyte store emulation: dynamic front padding size is "
- "not yet implemented");
- }
-
auto memrefBase = cast<MemRefValue>(adaptor.getBase());
// RMWs are not needed when:
@@ -722,6 +716,8 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
auto frontSubWidthStoreElem =
(emulatedPerContainerElem - *foldedNumFrontPadElems) %
emulatedPerContainerElem;
+
+ bool partiallyStoredFrontByte = false;
if (frontSubWidthStoreElem > 0) {
SmallVector<bool> frontMaskValues(emulatedPerContainerElem, false);
if (*foldedNumFrontPadElems + origElements < emulatedPerContainerElem) {
@@ -742,6 +738,7 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
storeFunc(rewriter, loc, memrefBase, currentDestIndex,
cast<VectorValue>(value), frontMask.getResult());
+ partiallyStoredFrontByte = true;
}
if (currentSourceIndex >= origElements) {
@@ -749,11 +746,13 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
return success();
}
- // Increment the destination index by 1 to align to the emulated width
- // boundary.
- auto constantOne = rewriter.create<arith::ConstantIndexOp>(loc, 1);
- currentDestIndex = rewriter.create<arith::AddIOp>(
- loc, rewriter.getIndexType(), currentDestIndex, constantOne);
+ if (partiallyStoredFrontByte) {
+ // Increment the destination index by 1 to align to the emulated width
+ // boundary, if the front byte was partially stored.
+ auto constantOne = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+ currentDestIndex = rewriter.create<arith::AddIOp>(
+ loc, rewriter.getIndexType(), currentDestIndex, constantOne);
+ }
// 2. Full width store for the inner output bytes.
// After the previous step, the store address is aligned to the emulated
More information about the Mlir-commits
mailing list