[Mlir-commits] [mlir] [MLIR] VectorEmulateNarrowType to support loading of unaligned vectors (PR #113411)
Han-Chung Wang
llvmlistbot at llvm.org
Thu Oct 24 12:57:32 PDT 2024
================
@@ -294,35 +345,65 @@ struct ConvertVectorLoad final : OpConversionPattern<vector::LoadOp> {
// %1 = vector.load %0[%linear_index] : memref<6xi8>, vector<2xi8>
// %2 = vector.bitcast %1 : vector<2xi8> to vector<4xi4>
//
- // TODO: Currently, only the even number of elements loading is supported.
- // To deal with the odd number of elements, one has to extract the
- // subvector at the proper offset after bit-casting.
+ // There are cases where the number of elements to load is not byte-aligned,
+ // for example:
+ //
+ // %1 = vector.load %0[%c1, %c0] : memref<3x3xi2>, vector<3xi2>
+ //
+ // we will have to load extra bytes and extract the exact slice in between.
+ //
+ // %1 = vector.load %0[%c2] : memref<3xi8>, vector<2xi8>
+ // %2 = vector.bitcast %1 : vector<2xi8> to vector<8xi2>
+ // %3 = vector.extract_strided_slice %1 {offsets = [2], sizes = [3], strides
+ // = [1]}
+ // : vector<8xi2> to vector<3xi2>
+ //
+ // TODO: Currently the extract_strided_slice's attributes must be known at
+ // compile time as they must be constants.
auto origElements = op.getVectorType().getNumElements();
- if (origElements % scale != 0)
- return failure();
+ bool isUnalignedEmulation = origElements % scale != 0;
auto stridedMetadata =
rewriter.create<memref::ExtractStridedMetadataOp>(loc, op.getBase());
OpFoldResult linearizedIndices;
- std::tie(std::ignore, linearizedIndices) =
+ memref::LinearizedMemRefInfo linearizedInfo;
+ std::tie(linearizedInfo, linearizedIndices) =
memref::getLinearizedMemRefOffsetAndSize(
rewriter, loc, srcBits, dstBits,
stridedMetadata.getConstifiedMixedOffset(),
stridedMetadata.getConstifiedMixedSizes(),
stridedMetadata.getConstifiedMixedStrides(),
getAsOpFoldResult(adaptor.getIndices()));
- auto numElements = (origElements + scale - 1) / scale;
+ auto foldedFrontPaddingSize = getFrontPaddingSize(
+ rewriter, loc, linearizedInfo, isUnalignedEmulation);
+
+ if (!foldedFrontPaddingSize) {
+ // unimplemented case for dynamic front padding size
+ return failure();
+ }
+
+ auto numElements =
+ llvm::alignTo(*foldedFrontPaddingSize + origElements, scale) / scale;
----------------
hanhanW wrote:
Sorry I made a bad suggestion because I misread it. I think we can use `llvm::divideCeil(*foldedFrontPaddingSize + origElements, scale)`. please also update other codes in the patch.
https://github.com/llvm/llvm-project/pull/113411
More information about the Mlir-commits
mailing list