[Mlir-commits] [mlir] [mlir][vector] Add support for linearizing Extract, ExtractStridedSlice, Shuffle VectorOps in VectorLinearize (PR #88204)
Charitha Saumya
llvmlistbot at llvm.org
Tue Apr 16 12:07:57 PDT 2024
https://github.com/charithaintc updated https://github.com/llvm/llvm-project/pull/88204
>From dc63b10f878bf2609bd04cc7668b238939969282 Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 9 Apr 2024 14:04:04 -0700
Subject: [PATCH 1/8] add linearize patterns for Extract, ExtractStridedSlice,
Shuffle VectorOps
---
.../Vector/Transforms/VectorLinearize.cpp | 249 +++++++++++++++++-
mlir/test/Dialect/Vector/linearize.mlir | 80 ++++++
2 files changed, 328 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
index b59e9062e5a08e..257c940e5ed93c 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
@@ -15,7 +15,9 @@
#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Support/LogicalResult.h"
#include "mlir/Transforms/DialectConversion.h"
+#include <numeric>
using namespace mlir;
@@ -103,6 +105,234 @@ struct LinearizeVectorizable final
return success();
}
+private:
+ unsigned targetVectorBitWidth;
+};
+
+struct LinearizeVectorExtractStridedSlice final
+ : public mlir::OpConversionPattern<mlir::vector::ExtractStridedSliceOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LinearizeVectorExtractStridedSlice(
+ const TypeConverter &typeConverter, MLIRContext *context,
+ unsigned targetVectBitWidth = std::numeric_limits<unsigned>::max(),
+ PatternBenefit benefit = 1)
+ : OpConversionPattern(typeConverter, context, benefit),
+ targetVectorBitWidth(targetVectBitWidth) {}
+
+ LogicalResult
+ matchAndRewrite(vector::ExtractStridedSliceOp extractOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto dstType = getTypeConverter()->convertType(extractOp.getType());
+ auto loc = extractOp.getLoc();
+ if (!dstType)
+ return rewriter.notifyMatchFailure(loc, "cannot convert type.");
+ if (extractOp.getVector().getType().isScalable() ||
+ dstType.cast<VectorType>().isScalable())
+ return rewriter.notifyMatchFailure(loc,
+ "scalable vectors are not supported.");
+ if (!isLessThanTargetBitWidth(extractOp, targetVectorBitWidth))
+ return rewriter.notifyMatchFailure(
+ extractOp, "Can't flatten since targetBitWidth <= OpSize");
+
+ auto offsets = extractOp.getOffsets().getValue();
+ auto sizes = extractOp.getSizes().getValue();
+ auto strides = extractOp.getStrides().getValue();
+
+ if (!isConstantIntValue(strides[0], 1))
+ return rewriter.notifyMatchFailure(
+ extractOp, "Strided slice with stride != 1 is not supported.");
+
+ Value srcVector = adaptor.getVector();
+
+ // if kD offsets are specified for nd source vector (n > k), the granularity
+ // of the extraction is greater than 1. In this case last (n-k) dimensions
+ // form the extraction granularity. example : %0 =
+ // vector.extract_strided_slice %src { offsets = [0, 0], sizes = [2, 2],
+ // strides = [1, 1]} : vector<4x8x8xf32> to vector<2x2x8xf32>
+ // here, extraction granularity is 8.
+ int64_t extractSliceLen = 1;
+ auto n = extractOp.getSourceVectorType().getRank();
+ auto k = (int64_t)offsets.size();
+ if (n > k) {
+ for (unsigned i = 0; i < n - k; i++) {
+ extractSliceLen *= extractOp.getSourceVectorType().getShape()[i + k];
+ }
+ }
+
+ // get total number of extracted slices
+ int64_t nExtractedSlices = 1;
+ for (auto size : sizes) {
+ nExtractedSlices *= size.cast<IntegerAttr>().getInt();
+ }
+
+ // compute the strides of the source vector considering first k dimensions
+ llvm::SmallVector<int64_t, 4> sourceStrides(k, extractSliceLen);
+ for (int i = k - 2; i >= 0; --i) {
+ sourceStrides[i] = sourceStrides[i + 1] *
+ extractOp.getSourceVectorType().getShape()[i + 1];
+ }
+ // final shuffle indices has nExtractedElems * extractSliceLen elements
+ llvm::SmallVector<int64_t, 4> indices(nExtractedSlices * extractSliceLen);
+ // compute the strides of the extracted kD vector
+ llvm::SmallVector<int64_t, 4> extractedStrides(k, 1);
+ // compute extractedStrides
+ for (int i = k - 2; i >= 0; --i) {
+ extractedStrides[i] =
+ extractedStrides[i + 1] * sizes[i + 1].cast<IntegerAttr>().getInt();
+ }
+ // iterate over all extracted slices from 0 to nExtractedElems-1
+ // and compute the multi-dimensional index and the corresponding linearized
+ // index within the source vector
+ for (int64_t i = 0; i < nExtractedSlices; ++i) {
+ int64_t index = i;
+ // compute the corresponding multi-dimensional index
+ llvm::SmallVector<int64_t, 4> multiDimIndex(k, 0);
+ for (int64_t j = 0; j < k; ++j) {
+ multiDimIndex[j] = (index / extractedStrides[j]);
+ index -= multiDimIndex[j] * extractedStrides[j];
+ }
+ // compute the corresponding linearized index in the source vector
+ // i.e. shift the multiDimIndex by the offsets
+ int64_t linearizedIndex = 0;
+ for (int64_t j = 0; j < k; ++j) {
+ linearizedIndex +=
+ (offsets[j].cast<IntegerAttr>().getInt() + multiDimIndex[j]) *
+ sourceStrides[j];
+ }
+ // fill the indices array form linearizedIndex to linearizedIndex +
+ // sliceLen
+ for (int64_t j = 0; j < extractSliceLen; ++j) {
+ indices[i * extractSliceLen + j] = linearizedIndex + j;
+ }
+ }
+ // perform a shuffle to extract the kD vector
+ rewriter.replaceOpWithNewOp<vector::ShuffleOp>(
+ extractOp, dstType, srcVector, srcVector,
+ rewriter.getI64ArrayAttr(indices));
+
+ return success();
+ }
+
+private:
+ unsigned targetVectorBitWidth;
+};
+
+struct LinearizeVectorShffle final
+ : public OpConversionPattern<vector::ShuffleOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LinearizeVectorShffle(
+ const TypeConverter &typeConverter, MLIRContext *context,
+ unsigned targetVectBitWidth = std::numeric_limits<unsigned>::max(),
+ PatternBenefit benefit = 1)
+ : OpConversionPattern(typeConverter, context, benefit),
+ targetVectorBitWidth(targetVectBitWidth) {}
+
+ LogicalResult
+ matchAndRewrite(vector::ShuffleOp shuffleOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto dstType = getTypeConverter()->convertType(shuffleOp.getType());
+ auto loc = shuffleOp.getLoc();
+ if (!dstType)
+ return rewriter.notifyMatchFailure(loc, "cannot convert type.");
+
+ if (shuffleOp.getV1VectorType().isScalable() ||
+ shuffleOp.getV2VectorType().isScalable() ||
+ dstType.cast<VectorType>().isScalable())
+ return rewriter.notifyMatchFailure(loc,
+ "scalable vectors are not supported.");
+ if (!isLessThanTargetBitWidth(shuffleOp, targetVectorBitWidth))
+ return rewriter.notifyMatchFailure(
+ shuffleOp, "Can't flatten since targetBitWidth <= OpSize");
+
+ auto vec1 = adaptor.getV1();
+ auto vec2 = adaptor.getV2();
+
+ int shuffleSliceLen = 1;
+ int rank = shuffleOp.getV1().getType().getRank();
+
+ // if rank > 1, we need to do the shuffle in the granularity of slices
+ // instead of scalars. Size of the slice is equal to the rank-1 innermost
+ // dims. Mask of the shuffle op specifies which slice to take from the
+ // outermost dim.
+ if (rank > 1) {
+ auto shape = shuffleOp.getV1().getType().getShape();
+ for (unsigned i = 1; i < shape.size(); i++) {
+ shuffleSliceLen *= shape[i];
+ }
+ }
+
+ auto mask = shuffleOp.getMask();
+ auto totalSize = mask.size() * shuffleSliceLen;
+
+ llvm::SmallVector<int64_t, 2> indices(totalSize);
+ for (auto [i, value] :
+ llvm::enumerate(mask.getAsValueRange<IntegerAttr>())) {
+
+ int64_t v = value.getZExtValue();
+ std::iota(indices.begin() + shuffleSliceLen * i,
+ indices.begin() + shuffleSliceLen * (i + 1),
+ shuffleSliceLen * v);
+ }
+
+ rewriter.replaceOpWithNewOp<vector::ShuffleOp>(
+ shuffleOp, dstType, vec1, vec2, rewriter.getI64ArrayAttr(indices));
+
+ return success();
+ }
+
+private:
+ unsigned targetVectorBitWidth;
+};
+
+struct LinearizeVectorExtract final
+ : public OpConversionPattern<vector::ExtractOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LinearizeVectorExtract(
+ const TypeConverter &typeConverter, MLIRContext *context,
+ unsigned targetVectBitWidth = std::numeric_limits<unsigned>::max(),
+ PatternBenefit benefit = 1)
+ : OpConversionPattern(typeConverter, context, benefit),
+ targetVectorBitWidth(targetVectBitWidth) {}
+ LogicalResult
+ matchAndRewrite(vector::ExtractOp extractOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto dstTy = getTypeConverter()->convertType(extractOp.getType());
+ if (!dstTy)
+ return rewriter.notifyMatchFailure(extractOp, "cannot convert type.");
+
+ if (extractOp.getVector().getType().isScalable() ||
+ dstTy.cast<VectorType>().isScalable())
+ return rewriter.notifyMatchFailure(extractOp,
+ "scalable vectors are not supported.");
+ if (!isLessThanTargetBitWidth(extractOp, targetVectorBitWidth))
+ return rewriter.notifyMatchFailure(
+ extractOp, "Can't flatten since targetBitWidth <= OpSize");
+
+ // dynamic position is not supported
+ if (extractOp.hasDynamicPosition())
+ return rewriter.notifyMatchFailure(extractOp,
+ "dynamic position is not supported.");
+
+ auto shape = extractOp.getVector().getType().getShape();
+ auto size = extractOp.getVector().getType().getNumElements();
+
+ // compute linearized offset
+ int64_t linearizedOffset = 0;
+ auto offsets = extractOp.getStaticPosition();
+ for (auto [i, off] : llvm::enumerate(offsets)) {
+ size /= shape[i];
+ linearizedOffset += offsets[i] * size;
+ }
+
+ llvm::SmallVector<int64_t, 2> indices(size);
+ std::iota(indices.begin(), indices.end(), linearizedOffset);
+ rewriter.replaceOpWithNewOp<vector::ShuffleOp>(
+ extractOp, dstTy, adaptor.getVector(), adaptor.getVector(),
+ rewriter.getI64ArrayAttr(indices));
+
+ return success();
+ }
+
private:
unsigned targetVectorBitWidth;
};
@@ -139,9 +369,26 @@ void mlir::vector::populateVectorLinearizeTypeConversionsAndLegality(
? typeConverter.isLegal(op)
: true);
}
+ if (isa<vector::ShuffleOp>(op)) {
+ return (isLessThanTargetBitWidth(op, targetBitWidth)
+ ? (typeConverter.isLegal(op) &&
+ op->getResult(0)
+ .getType()
+ .cast<mlir::VectorType>()
+ .getRank() == 1)
+ : true);
+ }
return std::nullopt;
});
- patterns.add<LinearizeConstant, LinearizeVectorizable>(
+ // target.addDynamicallyLegalOp<mlir::vector::ShuffleOp>(
+ // [=](mlir::Operation *op) {
+ // return op->getResult(0).getType().cast<mlir::VectorType>().getRank()
+ // ==
+ // 1;
+ // });
+
+ patterns.add<LinearizeConstant, LinearizeVectorizable, LinearizeVectorShffle,
+ LinearizeVectorExtract, LinearizeVectorExtractStridedSlice>(
typeConverter, patterns.getContext(), targetBitWidth);
}
diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir
index 212541c79565b6..d4215a88977eb7 100644
--- a/mlir/test/Dialect/Vector/linearize.mlir
+++ b/mlir/test/Dialect/Vector/linearize.mlir
@@ -164,3 +164,83 @@ func.func @test_scalable_no_linearize(%arg0: vector<2x[2]xf32>) -> vector<2x[2]x
return %2 : vector<2x[2]xf32>
}
+
+// -----
+// ALL-LABEL: test_extract_strided_slice_1
+// ALL-SAME: (%[[ORIG_ARG:.*]]: vector<4x8xf32>) -> vector<2x2xf32> {
+func.func @test_extract_strided_slice_1(%arg0 : vector<4x8xf32>) -> vector<2x2xf32> {
+ // DEFAULT: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<4x8xf32> to vector<32xf32>
+ // DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
+ // DEFAULT: [4, 5, 12, 13] : vector<32xf32>, vector<32xf32>
+ // DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<4xf32> to vector<2x2xf32>
+ // DEFAULT: return %[[RES]] : vector<2x2xf32
+
+ // BW-128: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<4x8xf32> to vector<32xf32>
+ // BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
+ // BW-128: [4, 5, 12, 13] : vector<32xf32>, vector<32xf32>
+ // BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<4xf32> to vector<2x2xf32>
+ // BW-128: return %[[RES]] : vector<2x2xf32>
+ %0 = vector.extract_strided_slice %arg0 { sizes = [2, 2], strides = [1, 1], offsets = [0, 4]}
+ : vector<4x8xf32> to vector<2x2xf32>
+ return %0 : vector<2x2xf32>
+}
+
+// -----
+// ALL-LABEL: test_extract_strided_slice_2
+// ALL-SAME: (%[[ORIG_ARG:.*]]: vector<2x8x2xf32>) -> vector<1x4x2xf32> {
+func.func @test_extract_strided_slice_2(%arg0 : vector<2x8x2xf32>) -> vector<1x4x2xf32> {
+ // DEFAULT: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
+ // DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
+ // DEFAULT: [20, 21, 22, 23, 24, 25, 26, 27] : vector<32xf32>, vector<32xf32>
+ // DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<8xf32> to vector<1x4x2xf32>
+ // DEFAULT: return %[[RES]] : vector<1x4x2xf32>
+
+ // BW-128: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
+ // BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
+ // BW-128: [20, 21, 22, 23, 24, 25, 26, 27] : vector<32xf32>, vector<32xf32>
+ // BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<8xf32> to vector<1x4x2xf32>
+ // BW-128: return %[[RES]] : vector<1x4x2xf32>
+ %0 = vector.extract_strided_slice %arg0 { offsets = [1, 2], strides = [1, 1], sizes = [1, 4] }
+ : vector<2x8x2xf32> to vector<1x4x2xf32>
+ return %0 : vector<1x4x2xf32>
+}
+
+// -----
+// ALL-LABEL: test_vector_shuffle
+// ALL-SAME: (%[[ORIG_ARG0:.*]]: vector<4x2xf32>, %[[ORIG_ARG1:.*]]: vector<4x2xf32>) -> vector<8x2xf32> {
+func.func @test_vector_shuffle(%arg0: vector<4x2xf32>, %arg1: vector<4x2xf32>) -> vector<8x2xf32> {
+ // DEFAULT: %[[ARG0:.*]] = vector.shape_cast %[[ORIG_ARG0]] : vector<4x2xf32> to vector<8xf32>
+ // DEFAULT: %[[ARG1:.*]] = vector.shape_cast %[[ORIG_ARG1]] : vector<4x2xf32> to vector<8xf32>
+ // DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG0]], %[[ARG1]]
+ // DEFAULT: [0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
+ // DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
+ // DEFAULT: return %[[RES]] : vector<8x2xf32>
+
+ // BW-128: %[[ARG0:.*]] = vector.shape_cast %[[ORIG_ARG0]] : vector<4x2xf32> to vector<8xf32>
+ // BW-128: %[[ARG1:.*]] = vector.shape_cast %[[ORIG_ARG1]] : vector<4x2xf32> to vector<8xf32>
+ // BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG0]], %[[ARG1]]
+ // BW-128: [0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
+ // BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
+ // BW-128: return %[[RES]] : vector<8x2xf32>
+ %0 = vector.shuffle %arg0, %arg1 [0, 4, 1, 5, 2, 6, 3, 7] : vector<4x2xf32>, vector<4x2xf32>
+ return %0 : vector<8x2xf32>
+}
+
+// -----
+// ALL-LABEL: test_vector_extract
+// ALL-SAME: (%[[ORIG_ARG:.*]]: vector<2x8x2xf32>) -> vector<8x2xf32> {
+func.func @test_vector_extract(%arg0: vector<2x8x2xf32>) -> vector<8x2xf32> {
+ // DEFAULT: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
+ // DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
+ // DEFAULT: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] : vector<32xf32>, vector<32xf32>
+ // DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
+ // DEFAULT: return %[[RES]] : vector<8x2xf32>
+
+ // BW-128: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
+ // BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
+ // BW-128: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] : vector<32xf32>, vector<32xf32>
+ // BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
+ // BW-128: return %[[RES]] : vector<8x2xf32>
+ %0 = vector.extract %arg0[1]: vector<8x2xf32> from vector<2x8x2xf32>
+ return %0 : vector<8x2xf32>
+}
>From de748c0f93e1ead19c5cc402940c9df8ab180b2d Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 9 Apr 2024 14:59:06 -0700
Subject: [PATCH 2/8] remove comments
---
mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp | 7 -------
1 file changed, 7 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
index 257c940e5ed93c..e5157abd245b5d 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
@@ -381,13 +381,6 @@ void mlir::vector::populateVectorLinearizeTypeConversionsAndLegality(
return std::nullopt;
});
- // target.addDynamicallyLegalOp<mlir::vector::ShuffleOp>(
- // [=](mlir::Operation *op) {
- // return op->getResult(0).getType().cast<mlir::VectorType>().getRank()
- // ==
- // 1;
- // });
-
patterns.add<LinearizeConstant, LinearizeVectorizable, LinearizeVectorShffle,
LinearizeVectorExtract, LinearizeVectorExtractStridedSlice>(
typeConverter, patterns.getContext(), targetBitWidth);
>From 962243c475e9f4b2b4fc1231edf92bc06ec12767 Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 9 Apr 2024 15:21:22 -0700
Subject: [PATCH 3/8] fix test
---
mlir/test/Dialect/Vector/linearize.mlir | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir
index 88d011e7c8594c..67f0f667a6b205 100644
--- a/mlir/test/Dialect/Vector/linearize.mlir
+++ b/mlir/test/Dialect/Vector/linearize.mlir
@@ -169,6 +169,9 @@ func.func @test_extract_strided_slice_1(%arg0 : vector<4x8xf32>) -> vector<2x2xf
// BW-128: [4, 5, 12, 13] : vector<32xf32>, vector<32xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<4xf32> to vector<2x2xf32>
// BW-128: return %[[RES]] : vector<2x2xf32>
+
+ // BW-0: %[[RES:.*]] = vector.extract_strided_slice %[[ARG:.*]] {offsets = [0, 4], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32>
+ // BW-0: return %[[RES]] : vector<2x2xf32>
%0 = vector.extract_strided_slice %arg0 { sizes = [2, 2], strides = [1, 1], offsets = [0, 4]}
: vector<4x8xf32> to vector<2x2xf32>
return %0 : vector<2x2xf32>
@@ -189,6 +192,9 @@ func.func @test_extract_strided_slice_2(%arg0 : vector<2x8x2xf32>) -> vector<1x4
// BW-128: [20, 21, 22, 23, 24, 25, 26, 27] : vector<32xf32>, vector<32xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<8xf32> to vector<1x4x2xf32>
// BW-128: return %[[RES]] : vector<1x4x2xf32>
+
+ // BW-0: %[[RES:.*]] = vector.extract_strided_slice %[[ORIG_ARG]] {offsets = [1, 2], sizes = [1, 4], strides = [1, 1]} : vector<2x8x2xf32> to vector<1x4x2xf32>
+ // BW-0: return %[[RES]] : vector<1x4x2xf32>
%0 = vector.extract_strided_slice %arg0 { offsets = [1, 2], strides = [1, 1], sizes = [1, 4] }
: vector<2x8x2xf32> to vector<1x4x2xf32>
return %0 : vector<1x4x2xf32>
@@ -211,6 +217,9 @@ func.func @test_vector_shuffle(%arg0: vector<4x2xf32>, %arg1: vector<4x2xf32>) -
// BW-128: [0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
// BW-128: return %[[RES]] : vector<8x2xf32>
+
+ // BW-0: %[[RES:.*]] = vector.shuffle %[[ORIG_ARG0]], %[[ORIG_ARG1]] [0, 4, 1, 5, 2, 6, 3, 7] : vector<4x2xf32>, vector<4x2xf32>
+ // BW-0: return %[[RES]] : vector<8x2xf32>
%0 = vector.shuffle %arg0, %arg1 [0, 4, 1, 5, 2, 6, 3, 7] : vector<4x2xf32>, vector<4x2xf32>
return %0 : vector<8x2xf32>
}
@@ -230,6 +239,9 @@ func.func @test_vector_extract(%arg0: vector<2x8x2xf32>) -> vector<8x2xf32> {
// BW-128: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] : vector<32xf32>, vector<32xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
// BW-128: return %[[RES]] : vector<8x2xf32>
+
+ // BW-0: %[[RES:.*]] = vector.extract %[[ORIG_ARG]][1] : vector<8x2xf32> from vector<2x8x2xf32>
+ // BW-0: return %[[RES]] : vector<8x2xf32>
%0 = vector.extract %arg0[1]: vector<8x2xf32> from vector<2x8x2xf32>
return %0 : vector<8x2xf32>
}
>From e20be009e4b7e9abdc6acd09de44d066abcd4a30 Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 12 Apr 2024 12:22:24 -0700
Subject: [PATCH 4/8] address comments
---
.../Vector/Transforms/VectorLinearize.cpp | 34 +++++++++++--------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
index e5157abd245b5d..c85f8ecf825090 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
@@ -109,6 +109,9 @@ struct LinearizeVectorizable final
unsigned targetVectorBitWidth;
};
+
+/// This pattern converts the vector.extract_strided_slice operation to a
+/// vector.shuffle operation that works on a linearized vector.
struct LinearizeVectorExtractStridedSlice final
: public mlir::OpConversionPattern<mlir::vector::ExtractStridedSliceOp> {
using OpConversionPattern::OpConversionPattern;
@@ -137,18 +140,16 @@ struct LinearizeVectorExtractStridedSlice final
auto offsets = extractOp.getOffsets().getValue();
auto sizes = extractOp.getSizes().getValue();
auto strides = extractOp.getStrides().getValue();
-
if (!isConstantIntValue(strides[0], 1))
return rewriter.notifyMatchFailure(
extractOp, "Strided slice with stride != 1 is not supported.");
-
Value srcVector = adaptor.getVector();
-
// if kD offsets are specified for nd source vector (n > k), the granularity
// of the extraction is greater than 1. In this case last (n-k) dimensions
- // form the extraction granularity. example : %0 =
- // vector.extract_strided_slice %src { offsets = [0, 0], sizes = [2, 2],
- // strides = [1, 1]} : vector<4x8x8xf32> to vector<2x2x8xf32>
+ // form the extraction granularity.
+ // example :
+ // %0 = vector.extract_strided_slice %src { offsets = [0, 0], sizes = [2, 2],
+ // strides = [1, 1]} : vector<4x8x8xf32> to vector<2x2x8xf32>
// here, extraction granularity is 8.
int64_t extractSliceLen = 1;
auto n = extractOp.getSourceVectorType().getRank();
@@ -158,13 +159,11 @@ struct LinearizeVectorExtractStridedSlice final
extractSliceLen *= extractOp.getSourceVectorType().getShape()[i + k];
}
}
-
// get total number of extracted slices
int64_t nExtractedSlices = 1;
for (auto size : sizes) {
nExtractedSlices *= size.cast<IntegerAttr>().getInt();
}
-
// compute the strides of the source vector considering first k dimensions
llvm::SmallVector<int64_t, 4> sourceStrides(k, extractSliceLen);
for (int i = k - 2; i >= 0; --i) {
@@ -209,7 +208,6 @@ struct LinearizeVectorExtractStridedSlice final
rewriter.replaceOpWithNewOp<vector::ShuffleOp>(
extractOp, dstType, srcVector, srcVector,
rewriter.getI64ArrayAttr(indices));
-
return success();
}
@@ -217,6 +215,9 @@ struct LinearizeVectorExtractStridedSlice final
unsigned targetVectorBitWidth;
};
+
+/// This pattern converts the vector.shuffle operation that works on nD (n > 1)
+/// vectors to a vector.shuffle operation that works on linearized vectors.
struct LinearizeVectorShffle final
: public OpConversionPattern<vector::ShuffleOp> {
using OpConversionPattern::OpConversionPattern;
@@ -234,7 +235,6 @@ struct LinearizeVectorShffle final
auto loc = shuffleOp.getLoc();
if (!dstType)
return rewriter.notifyMatchFailure(loc, "cannot convert type.");
-
if (shuffleOp.getV1VectorType().isScalable() ||
shuffleOp.getV2VectorType().isScalable() ||
dstType.cast<VectorType>().isScalable())
@@ -246,7 +246,6 @@ struct LinearizeVectorShffle final
auto vec1 = adaptor.getV1();
auto vec2 = adaptor.getV2();
-
int shuffleSliceLen = 1;
int rank = shuffleOp.getV1().getType().getRank();
@@ -261,10 +260,13 @@ struct LinearizeVectorShffle final
}
}
+ // for each value in the mask, we generate the indices of the source vectors
+ // that needs to be shuffled to the destination vector. if shuffleSliceLen > 1
+ // we need to shuffle the slices (consecutive shuffleSliceLen number of elements)
+ // instead of scalars.
auto mask = shuffleOp.getMask();
- auto totalSize = mask.size() * shuffleSliceLen;
-
- llvm::SmallVector<int64_t, 2> indices(totalSize);
+ auto totalSizeOfShuffledElmnts = mask.size() * shuffleSliceLen;
+ llvm::SmallVector<int64_t, 2> indices(totalSizeOfShuffledElmnts);
for (auto [i, value] :
llvm::enumerate(mask.getAsValueRange<IntegerAttr>())) {
@@ -276,7 +278,6 @@ struct LinearizeVectorShffle final
rewriter.replaceOpWithNewOp<vector::ShuffleOp>(
shuffleOp, dstType, vec1, vec2, rewriter.getI64ArrayAttr(indices));
-
return success();
}
@@ -284,6 +285,9 @@ struct LinearizeVectorShffle final
unsigned targetVectorBitWidth;
};
+
+/// This pattern converts the vector.extract operation to a vector.shuffle operation
+/// that works on a linearized vector.
struct LinearizeVectorExtract final
: public OpConversionPattern<vector::ExtractOp> {
using OpConversionPattern::OpConversionPattern;
>From 0d9406df3f522c6a57fae495d78c798baed3c419 Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 12 Apr 2024 16:02:20 -0700
Subject: [PATCH 5/8] address comments
---
.../Vector/Transforms/VectorRewritePatterns.h | 6 ++
.../Vector/Transforms/VectorLinearize.cpp | 80 +++++++++++--------
.../Dialect/Vector/TestVectorTransforms.cpp | 2 +
3 files changed, 53 insertions(+), 35 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h
index 453fa73429dd1a..d630a3562beb94 100644
--- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h
+++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h
@@ -389,6 +389,12 @@ void populateVectorLinearizeTypeConversionsAndLegality(
TypeConverter &typeConverter, RewritePatternSet &patterns,
ConversionTarget &target, unsigned targetBitWidth);
+/// Populates patterns for linearizing ND (N >= 2) vector operations to 1D
+/// vector shuffle operations.
+void populateVectorLinearizeToShuffleRewritePatterns(
+ TypeConverter &typeConverter, RewritePatternSet &patterns,
+ ConversionTarget &target, unsigned targetBitWidth);
+
} // namespace vector
} // namespace mlir
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
index c85f8ecf825090..08f6f03eb56ac8 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
@@ -17,6 +17,7 @@
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Support/LogicalResult.h"
#include "mlir/Transforms/DialectConversion.h"
+#include <cstdint>
#include <numeric>
using namespace mlir;
@@ -109,7 +110,6 @@ struct LinearizeVectorizable final
unsigned targetVectorBitWidth;
};
-
/// This pattern converts the vector.extract_strided_slice operation to a
/// vector.shuffle operation that works on a linearized vector.
struct LinearizeVectorExtractStridedSlice final
@@ -144,67 +144,68 @@ struct LinearizeVectorExtractStridedSlice final
return rewriter.notifyMatchFailure(
extractOp, "Strided slice with stride != 1 is not supported.");
Value srcVector = adaptor.getVector();
- // if kD offsets are specified for nd source vector (n > k), the granularity
+ // If kD offsets are specified for nd source vector (n > k), the granularity
// of the extraction is greater than 1. In this case last (n-k) dimensions
- // form the extraction granularity.
- // example :
- // %0 = vector.extract_strided_slice %src { offsets = [0, 0], sizes = [2, 2],
+ // form the extraction granularity.
+ // example :
+ // %0 = vector.extract_strided_slice %src { offsets = [0, 0], sizes = [2,
+ // 2],
// strides = [1, 1]} : vector<4x8x8xf32> to vector<2x2x8xf32>
// here, extraction granularity is 8.
int64_t extractSliceLen = 1;
auto n = extractOp.getSourceVectorType().getRank();
- auto k = (int64_t)offsets.size();
+ int64_t k = (int64_t)offsets.size();
if (n > k) {
for (unsigned i = 0; i < n - k; i++) {
extractSliceLen *= extractOp.getSourceVectorType().getShape()[i + k];
}
}
- // get total number of extracted slices
+ // Get total number of extracted slices.
int64_t nExtractedSlices = 1;
for (auto size : sizes) {
nExtractedSlices *= size.cast<IntegerAttr>().getInt();
}
- // compute the strides of the source vector considering first k dimensions
+ // Compute the strides of the source vector considering first k dimensions.
llvm::SmallVector<int64_t, 4> sourceStrides(k, extractSliceLen);
for (int i = k - 2; i >= 0; --i) {
sourceStrides[i] = sourceStrides[i + 1] *
extractOp.getSourceVectorType().getShape()[i + 1];
}
- // final shuffle indices has nExtractedElems * extractSliceLen elements
+ // Final shuffle indices has nExtractedElems * extractSliceLen elements.
llvm::SmallVector<int64_t, 4> indices(nExtractedSlices * extractSliceLen);
- // compute the strides of the extracted kD vector
+ // Compute the strides of the extracted kD vector.
llvm::SmallVector<int64_t, 4> extractedStrides(k, 1);
- // compute extractedStrides
+ // Compute extractedStrides.
for (int i = k - 2; i >= 0; --i) {
extractedStrides[i] =
extractedStrides[i + 1] * sizes[i + 1].cast<IntegerAttr>().getInt();
}
- // iterate over all extracted slices from 0 to nExtractedElems-1
+ // Iterate over all extracted slices from 0 to nExtractedElems-1
// and compute the multi-dimensional index and the corresponding linearized
- // index within the source vector
+ // index within the source vector.
for (int64_t i = 0; i < nExtractedSlices; ++i) {
int64_t index = i;
- // compute the corresponding multi-dimensional index
+ // Compute the corresponding multi-dimensional index.
llvm::SmallVector<int64_t, 4> multiDimIndex(k, 0);
for (int64_t j = 0; j < k; ++j) {
multiDimIndex[j] = (index / extractedStrides[j]);
index -= multiDimIndex[j] * extractedStrides[j];
}
- // compute the corresponding linearized index in the source vector
- // i.e. shift the multiDimIndex by the offsets
+ // Compute the corresponding linearized index in the source vector
+ // i.e. shift the multiDimIndex by the offsets.
int64_t linearizedIndex = 0;
for (int64_t j = 0; j < k; ++j) {
linearizedIndex +=
(offsets[j].cast<IntegerAttr>().getInt() + multiDimIndex[j]) *
sourceStrides[j];
}
- // fill the indices array form linearizedIndex to linearizedIndex +
- // sliceLen
+ // Fill the indices array form linearizedIndex to linearizedIndex +
+ // sliceLen.
for (int64_t j = 0; j < extractSliceLen; ++j) {
indices[i * extractSliceLen + j] = linearizedIndex + j;
}
}
- // perform a shuffle to extract the kD vector
+ // Perform a shuffle to extract the kD vector.
rewriter.replaceOpWithNewOp<vector::ShuffleOp>(
extractOp, dstType, srcVector, srcVector,
rewriter.getI64ArrayAttr(indices));
@@ -215,13 +216,12 @@ struct LinearizeVectorExtractStridedSlice final
unsigned targetVectorBitWidth;
};
-
/// This pattern converts the vector.shuffle operation that works on nD (n > 1)
/// vectors to a vector.shuffle operation that works on linearized vectors.
-struct LinearizeVectorShffle final
+struct LinearizeVectorShuffle final
: public OpConversionPattern<vector::ShuffleOp> {
using OpConversionPattern::OpConversionPattern;
- LinearizeVectorShffle(
+ LinearizeVectorShuffle(
const TypeConverter &typeConverter, MLIRContext *context,
unsigned targetVectBitWidth = std::numeric_limits<unsigned>::max(),
PatternBenefit benefit = 1)
@@ -249,7 +249,7 @@ struct LinearizeVectorShffle final
int shuffleSliceLen = 1;
int rank = shuffleOp.getV1().getType().getRank();
- // if rank > 1, we need to do the shuffle in the granularity of slices
+ // If rank > 1, we need to do the shuffle in the granularity of slices
// instead of scalars. Size of the slice is equal to the rank-1 innermost
// dims. Mask of the shuffle op specifies which slice to take from the
// outermost dim.
@@ -260,10 +260,10 @@ struct LinearizeVectorShffle final
}
}
- // for each value in the mask, we generate the indices of the source vectors
- // that needs to be shuffled to the destination vector. if shuffleSliceLen > 1
- // we need to shuffle the slices (consecutive shuffleSliceLen number of elements)
- // instead of scalars.
+ // For each value in the mask, we generate the indices of the source vectors
+ // that needs to be shuffled to the destination vector. If shuffleSliceLen >
+ // 1 we need to shuffle the slices (consecutive shuffleSliceLen number of
+ // elements) instead of scalars.
auto mask = shuffleOp.getMask();
auto totalSizeOfShuffledElmnts = mask.size() * shuffleSliceLen;
llvm::SmallVector<int64_t, 2> indices(totalSizeOfShuffledElmnts);
@@ -285,9 +285,8 @@ struct LinearizeVectorShffle final
unsigned targetVectorBitWidth;
};
-
-/// This pattern converts the vector.extract operation to a vector.shuffle operation
-/// that works on a linearized vector.
+/// This pattern converts the vector.extract operation to a vector.shuffle
+/// operation that works on a linearized vector.
struct LinearizeVectorExtract final
: public OpConversionPattern<vector::ExtractOp> {
using OpConversionPattern::OpConversionPattern;
@@ -312,7 +311,7 @@ struct LinearizeVectorExtract final
return rewriter.notifyMatchFailure(
extractOp, "Can't flatten since targetBitWidth <= OpSize");
- // dynamic position is not supported
+ // Dynamic position is not supported.
if (extractOp.hasDynamicPosition())
return rewriter.notifyMatchFailure(extractOp,
"dynamic position is not supported.");
@@ -320,7 +319,7 @@ struct LinearizeVectorExtract final
auto shape = extractOp.getVector().getType().getShape();
auto size = extractOp.getVector().getType().getNumElements();
- // compute linearized offset
+ // Compute linearized offset.
int64_t linearizedOffset = 0;
auto offsets = extractOp.getStaticPosition();
for (auto [i, off] : llvm::enumerate(offsets)) {
@@ -373,6 +372,18 @@ void mlir::vector::populateVectorLinearizeTypeConversionsAndLegality(
? typeConverter.isLegal(op)
: true);
}
+ return std::nullopt;
+ });
+
+ patterns.add<LinearizeConstant, LinearizeVectorizable>(
+ typeConverter, patterns.getContext(), targetBitWidth);
+}
+
+void mlir::vector::populateVectorLinearizeToShuffleRewritePatterns(
+ TypeConverter &typeConverter, RewritePatternSet &patterns,
+ ConversionTarget &target, unsigned int targetBitWidth) {
+ target.markUnknownOpDynamicallyLegal(
+ [=](Operation *op) -> std::optional<bool> {
if (isa<vector::ShuffleOp>(op)) {
return (isLessThanTargetBitWidth(op, targetBitWidth)
? (typeConverter.isLegal(op) &&
@@ -384,8 +395,7 @@ void mlir::vector::populateVectorLinearizeTypeConversionsAndLegality(
}
return std::nullopt;
});
-
- patterns.add<LinearizeConstant, LinearizeVectorizable, LinearizeVectorShffle,
- LinearizeVectorExtract, LinearizeVectorExtractStridedSlice>(
+ patterns.add<LinearizeVectorShuffle, LinearizeVectorExtract,
+ LinearizeVectorExtractStridedSlice>(
typeConverter, patterns.getContext(), targetBitWidth);
}
diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
index 00622599910567..e29ea9ce10c68d 100644
--- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp
@@ -867,6 +867,8 @@ struct TestVectorLinearize final
vector::populateVectorLinearizeTypeConversionsAndLegality(
typeConverter, patterns, target, targetVectorBitwidth);
+ vector::populateVectorLinearizeToShuffleRewritePatterns(
+ typeConverter, patterns, target, targetVectorBitwidth);
if (failed(applyPartialConversion(getOperation(), target,
std::move(patterns))))
return signalPassFailure();
>From e807c7a09f132d2a6bb6d8205b34d708e7129e1d Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 12 Apr 2024 16:14:04 -0700
Subject: [PATCH 6/8] fix tests
---
mlir/test/Dialect/Vector/linearize.mlir | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir
index 67f0f667a6b205..b29ceab5783d7a 100644
--- a/mlir/test/Dialect/Vector/linearize.mlir
+++ b/mlir/test/Dialect/Vector/linearize.mlir
@@ -160,13 +160,13 @@ func.func @test_0d_vector() -> vector<f32> {
func.func @test_extract_strided_slice_1(%arg0 : vector<4x8xf32>) -> vector<2x2xf32> {
// DEFAULT: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<4x8xf32> to vector<32xf32>
// DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
- // DEFAULT: [4, 5, 12, 13] : vector<32xf32>, vector<32xf32>
+ // DEFAULT-SAME: [4, 5, 12, 13] : vector<32xf32>, vector<32xf32>
// DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<4xf32> to vector<2x2xf32>
// DEFAULT: return %[[RES]] : vector<2x2xf32
// BW-128: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<4x8xf32> to vector<32xf32>
// BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
- // BW-128: [4, 5, 12, 13] : vector<32xf32>, vector<32xf32>
+ // BW-128-SAME: [4, 5, 12, 13] : vector<32xf32>, vector<32xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<4xf32> to vector<2x2xf32>
// BW-128: return %[[RES]] : vector<2x2xf32>
@@ -183,13 +183,13 @@ func.func @test_extract_strided_slice_1(%arg0 : vector<4x8xf32>) -> vector<2x2xf
func.func @test_extract_strided_slice_2(%arg0 : vector<2x8x2xf32>) -> vector<1x4x2xf32> {
// DEFAULT: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
// DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
- // DEFAULT: [20, 21, 22, 23, 24, 25, 26, 27] : vector<32xf32>, vector<32xf32>
+ // DEFAULT-SAME: [20, 21, 22, 23, 24, 25, 26, 27] : vector<32xf32>, vector<32xf32>
// DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<8xf32> to vector<1x4x2xf32>
// DEFAULT: return %[[RES]] : vector<1x4x2xf32>
// BW-128: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
// BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
- // BW-128: [20, 21, 22, 23, 24, 25, 26, 27] : vector<32xf32>, vector<32xf32>
+ // BW-128-SAME: [20, 21, 22, 23, 24, 25, 26, 27] : vector<32xf32>, vector<32xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<8xf32> to vector<1x4x2xf32>
// BW-128: return %[[RES]] : vector<1x4x2xf32>
@@ -207,14 +207,14 @@ func.func @test_vector_shuffle(%arg0: vector<4x2xf32>, %arg1: vector<4x2xf32>) -
// DEFAULT: %[[ARG0:.*]] = vector.shape_cast %[[ORIG_ARG0]] : vector<4x2xf32> to vector<8xf32>
// DEFAULT: %[[ARG1:.*]] = vector.shape_cast %[[ORIG_ARG1]] : vector<4x2xf32> to vector<8xf32>
// DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG0]], %[[ARG1]]
- // DEFAULT: [0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
+ // DEFAULT-SAME: [0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
// DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
// DEFAULT: return %[[RES]] : vector<8x2xf32>
// BW-128: %[[ARG0:.*]] = vector.shape_cast %[[ORIG_ARG0]] : vector<4x2xf32> to vector<8xf32>
// BW-128: %[[ARG1:.*]] = vector.shape_cast %[[ORIG_ARG1]] : vector<4x2xf32> to vector<8xf32>
// BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG0]], %[[ARG1]]
- // BW-128: [0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
+ // BW-128-SAME: [0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15] : vector<8xf32>, vector<8xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
// BW-128: return %[[RES]] : vector<8x2xf32>
@@ -230,13 +230,13 @@ func.func @test_vector_shuffle(%arg0: vector<4x2xf32>, %arg1: vector<4x2xf32>) -
func.func @test_vector_extract(%arg0: vector<2x8x2xf32>) -> vector<8x2xf32> {
// DEFAULT: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
// DEFAULT: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
- // DEFAULT: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] : vector<32xf32>, vector<32xf32>
+ // DEFAULT-SAME: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] : vector<32xf32>, vector<32xf32>
// DEFAULT: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
// DEFAULT: return %[[RES]] : vector<8x2xf32>
// BW-128: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32>
// BW-128: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]]
- // BW-128: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] : vector<32xf32>, vector<32xf32>
+ // BW-128-SAME: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] : vector<32xf32>, vector<32xf32>
// BW-128: %[[RES:.*]] = vector.shape_cast %[[SHUFFLE]] : vector<16xf32> to vector<8x2xf32>
// BW-128: return %[[RES]] : vector<8x2xf32>
>From 92374a23a5a5d33530472f8673ba29e49c0205c5 Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 16 Apr 2024 11:24:13 -0700
Subject: [PATCH 7/8] fix
---
.../Vector/Transforms/VectorLinearize.cpp | 23 ++++++++-----------
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
index 08f6f03eb56ac8..90e8444dc4f3a4 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
@@ -129,10 +129,9 @@ struct LinearizeVectorExtractStridedSlice final
auto loc = extractOp.getLoc();
if (!dstType)
return rewriter.notifyMatchFailure(loc, "cannot convert type.");
- if (extractOp.getVector().getType().isScalable() ||
- dstType.cast<VectorType>().isScalable())
- return rewriter.notifyMatchFailure(loc,
- "scalable vectors are not supported.");
+ assert(!(extractOp.getVector().getType().isScalable() ||
+ dstType.cast<VectorType>().isScalable()) &&
+ "scalable vectors are not supported.");
if (!isLessThanTargetBitWidth(extractOp, targetVectorBitWidth))
return rewriter.notifyMatchFailure(
extractOp, "Can't flatten since targetBitWidth <= OpSize");
@@ -235,11 +234,10 @@ struct LinearizeVectorShuffle final
auto loc = shuffleOp.getLoc();
if (!dstType)
return rewriter.notifyMatchFailure(loc, "cannot convert type.");
- if (shuffleOp.getV1VectorType().isScalable() ||
- shuffleOp.getV2VectorType().isScalable() ||
- dstType.cast<VectorType>().isScalable())
- return rewriter.notifyMatchFailure(loc,
- "scalable vectors are not supported.");
+ assert(!(shuffleOp.getV1VectorType().isScalable() ||
+ shuffleOp.getV2VectorType().isScalable() ||
+ dstType.cast<VectorType>().isScalable()) &&
+ "scalable vectors are not supported.");
if (!isLessThanTargetBitWidth(shuffleOp, targetVectorBitWidth))
return rewriter.notifyMatchFailure(
shuffleOp, "Can't flatten since targetBitWidth <= OpSize");
@@ -303,10 +301,9 @@ struct LinearizeVectorExtract final
if (!dstTy)
return rewriter.notifyMatchFailure(extractOp, "cannot convert type.");
- if (extractOp.getVector().getType().isScalable() ||
- dstTy.cast<VectorType>().isScalable())
- return rewriter.notifyMatchFailure(extractOp,
- "scalable vectors are not supported.");
+ assert(!(extractOp.getVector().getType().isScalable() ||
+ dstTy.cast<VectorType>().isScalable()) &&
+ "scalable vectors are not supported.");
if (!isLessThanTargetBitWidth(extractOp, targetVectorBitWidth))
return rewriter.notifyMatchFailure(
extractOp, "Can't flatten since targetBitWidth <= OpSize");
>From c45c5076012ff13a3dd4920005f0a7d204666006 Mon Sep 17 00:00:00 2001
From: "Gusthinna Waduge, Charitha Saumya"
<charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 16 Apr 2024 11:59:37 -0700
Subject: [PATCH 8/8] add comments
---
.../Vector/Transforms/VectorLinearize.cpp | 31 +++++++++++++++++--
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
index 90e8444dc4f3a4..b71a5c4b418f38 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
@@ -110,8 +110,17 @@ struct LinearizeVectorizable final
unsigned targetVectorBitWidth;
};
-/// This pattern converts the vector.extract_strided_slice operation to a
-/// vector.shuffle operation that works on a linearized vector.
+/// This pattern converts the ExtractStridedSliceOp into a ShuffleOp that works
+/// on a linearized vector.
+/// Following,
+/// vector.extract_strided_slice %source
+/// { offsets = [..], strides = [..], sizes = [..] }
+/// is converted to :
+/// %source_1d = vector.shape_cast %source
+/// %out_1d = vector.shuffle %source_1d, %source_1d [ shuffle_indices_1d ]
+/// %out_nd = vector.shape_cast %out_1d
+/// `shuffle_indices_1d` is computed using the offsets and sizes of the
+/// extraction.
struct LinearizeVectorExtractStridedSlice final
: public mlir::OpConversionPattern<mlir::vector::ExtractStridedSliceOp> {
using OpConversionPattern::OpConversionPattern;
@@ -150,7 +159,7 @@ struct LinearizeVectorExtractStridedSlice final
// %0 = vector.extract_strided_slice %src { offsets = [0, 0], sizes = [2,
// 2],
// strides = [1, 1]} : vector<4x8x8xf32> to vector<2x2x8xf32>
- // here, extraction granularity is 8.
+ // Here, extraction granularity is 8.
int64_t extractSliceLen = 1;
auto n = extractOp.getSourceVectorType().getRank();
int64_t k = (int64_t)offsets.size();
@@ -217,6 +226,15 @@ struct LinearizeVectorExtractStridedSlice final
/// This pattern converts the vector.shuffle operation that works on nD (n > 1)
/// vectors to a vector.shuffle operation that works on linearized vectors.
+/// Following,
+/// vector.shuffle %v1, %v2 [ shuffle_indices ]
+/// is converted to :
+/// %v1_1d = vector.shape_cast %v1
+/// %v2_1d = vector.shape_cast %v2
+/// %out_1d = vector.shuffle %v1_1d, %v2_1d [ shuffle_indices_1d ]
+/// %out_nd = vector.shape_cast %out_1d
+// `shuffle_indices_1d` is computed using the sizes and `shuffle_indices`
+/// of the original shuffle operation.
struct LinearizeVectorShuffle final
: public OpConversionPattern<vector::ShuffleOp> {
using OpConversionPattern::OpConversionPattern;
@@ -285,6 +303,13 @@ struct LinearizeVectorShuffle final
/// This pattern converts the vector.extract operation to a vector.shuffle
/// operation that works on a linearized vector.
+/// Following,
+/// vector.extract %source [ position ]
+/// is converted to :
+/// %source_1d = vector.shape_cast %source
+/// %out_1d = vector.shuffle %source_1d, %source_1d [ shuffle_indices_1d ]
+/// %out_nd = vector.shape_cast %out_1d
+/// `shuffle_indices_1d` is computed using the position of the original extract.
struct LinearizeVectorExtract final
: public OpConversionPattern<vector::ExtractOp> {
using OpConversionPattern::OpConversionPattern;
More information about the Mlir-commits
mailing list