[Mlir-commits] [mlir] [mlir][vector] Add support for linearizing Extract, ExtractStridedSlice, Shuffle VectorOps in VectorLinearize (PR #88204)
Diego Caballero
llvmlistbot at llvm.org
Tue Apr 16 23:31:02 PDT 2024
================
@@ -103,6 +108,252 @@ struct LinearizeVectorizable final
return success();
}
+private:
+ unsigned targetVectorBitWidth;
+};
+
+/// This pattern converts the ExtractStridedSliceOp into a ShuffleOp that works
+/// on a linearized vector.
+/// Following,
+/// vector.extract_strided_slice %source
+/// { offsets = [..], strides = [..], sizes = [..] }
+/// is converted to :
+/// %source_1d = vector.shape_cast %source
+/// %out_1d = vector.shuffle %source_1d, %source_1d [ shuffle_indices_1d ]
+/// %out_nd = vector.shape_cast %out_1d
+/// `shuffle_indices_1d` is computed using the offsets and sizes of the
+/// extraction.
+struct LinearizeVectorExtractStridedSlice final
+ : public mlir::OpConversionPattern<mlir::vector::ExtractStridedSliceOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LinearizeVectorExtractStridedSlice(
+ const TypeConverter &typeConverter, MLIRContext *context,
+ unsigned targetVectBitWidth = std::numeric_limits<unsigned>::max(),
+ PatternBenefit benefit = 1)
+ : OpConversionPattern(typeConverter, context, benefit),
+ targetVectorBitWidth(targetVectBitWidth) {}
+
+ LogicalResult
+ matchAndRewrite(vector::ExtractStridedSliceOp extractOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto dstType = getTypeConverter()->convertType(extractOp.getType());
+ assert(!(extractOp.getVector().getType().isScalable() ||
+ dstType.cast<VectorType>().isScalable()) &&
+ "scalable vectors are not supported.");
+ if (!isLessThanTargetBitWidth(extractOp, targetVectorBitWidth))
+ return rewriter.notifyMatchFailure(
+ extractOp, "Can't flatten since targetBitWidth <= OpSize");
+
+ auto offsets = extractOp.getOffsets();
+ auto sizes = extractOp.getSizes();
+ auto strides = extractOp.getStrides();
+ if (!isConstantIntValue(strides[0], 1))
+ return rewriter.notifyMatchFailure(
+ extractOp, "Strided slice with stride != 1 is not supported.");
+ Value srcVector = adaptor.getVector();
+ // If kD offsets are specified for nd source vector (n > k), the granularity
+ // of the extraction is greater than 1. In this case last (n-k) dimensions
+ // form the extraction granularity.
+ // Example :
+ // vector.extract_strided_slice %src {
+ // offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} :
+ // vector<4x8x8xf32> to vector<2x2x8xf32>
+ // Here, extraction granularity is 8.
+ int64_t extractGranularitySize = 1;
+ auto n = extractOp.getSourceVectorType().getRank();
+ int64_t k = (int64_t)offsets.size();
+ if (n > k) {
+ for (unsigned i = 0; i < n - k; i++) {
+ extractGranularitySize *=
+ extractOp.getSourceVectorType().getShape()[i + k];
+ }
+ }
+ // Get total number of extracted slices.
+ int64_t nExtractedSlices = 1;
+ llvm::for_each(sizes, [&](Attribute size) {
+ nExtractedSlices *= size.cast<IntegerAttr>().getInt();
+ });
+ // Compute the strides of the source vector considering first k dimensions.
+ llvm::SmallVector<int64_t, 4> sourceStrides(k, extractGranularitySize);
+ for (int i = k - 2; i >= 0; --i) {
+ sourceStrides[i] = sourceStrides[i + 1] *
+ extractOp.getSourceVectorType().getShape()[i + 1];
+ }
+ // Final shuffle indices has nExtractedSlices * extractGranularitySize
+ // elements.
+ llvm::SmallVector<int64_t, 4> indices(nExtractedSlices *
+ extractGranularitySize);
+ // Compute the strides of the extracted kD vector.
+ llvm::SmallVector<int64_t, 4> extractedStrides(k, 1);
+ // Compute extractedStrides.
+ for (int i = k - 2; i >= 0; --i) {
+ extractedStrides[i] =
+ extractedStrides[i + 1] * sizes[i + 1].cast<IntegerAttr>().getInt();
+ }
+ // Iterate over all extracted slices from 0 to nExtractedSlices - 1
+ // and compute the multi-dimensional index and the corresponding linearized
+ // index within the source vector.
+ for (int64_t i = 0; i < nExtractedSlices; ++i) {
+ int64_t index = i;
+ // Compute the corresponding multi-dimensional index.
+ llvm::SmallVector<int64_t, 4> multiDimIndex(k, 0);
+ for (int64_t j = 0; j < k; ++j) {
+ multiDimIndex[j] = (index / extractedStrides[j]);
+ index -= multiDimIndex[j] * extractedStrides[j];
+ }
+ // Compute the corresponding linearized index in the source vector
+ // i.e. shift the multiDimIndex by the offsets.
+ int64_t linearizedIndex = 0;
+ for (int64_t j = 0; j < k; ++j) {
+ linearizedIndex +=
+ (offsets[j].cast<IntegerAttr>().getInt() + multiDimIndex[j]) *
+ sourceStrides[j];
+ }
+ // Fill the indices array form linearizedIndex to linearizedIndex +
+ // extractGranularitySize.
+ for (int64_t j = 0; j < extractGranularitySize; ++j) {
+ indices[i * extractGranularitySize + j] = linearizedIndex + j;
+ }
+ }
+ // Perform a shuffle to extract the kD vector.
+ rewriter.replaceOpWithNewOp<vector::ShuffleOp>(
+ extractOp, dstType, srcVector, srcVector,
+ rewriter.getI64ArrayAttr(indices));
+ return success();
+ }
+
+private:
+ unsigned targetVectorBitWidth;
+};
+
+/// This pattern converts the ShuffleOp that works on nD (n > 1)
+/// vectors to a ShuffleOp that works on linearized vectors.
+/// Following,
+/// vector.shuffle %v1, %v2 [ shuffle_indices ]
+/// is converted to :
+/// %v1_1d = vector.shape_cast %v1
+/// %v2_1d = vector.shape_cast %v2
+/// %out_1d = vector.shuffle %v1_1d, %v2_1d [ shuffle_indices_1d ]
+/// %out_nd = vector.shape_cast %out_1d
+// `shuffle_indices_1d` is computed using the sizes and `shuffle_indices`
+/// of the original shuffle operation.
+struct LinearizeVectorShuffle final
+ : public OpConversionPattern<vector::ShuffleOp> {
+ using OpConversionPattern::OpConversionPattern;
+ LinearizeVectorShuffle(
+ const TypeConverter &typeConverter, MLIRContext *context,
+ unsigned targetVectBitWidth = std::numeric_limits<unsigned>::max(),
+ PatternBenefit benefit = 1)
+ : OpConversionPattern(typeConverter, context, benefit),
+ targetVectorBitWidth(targetVectBitWidth) {}
+
+ LogicalResult
+ matchAndRewrite(vector::ShuffleOp shuffleOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ auto dstType = getTypeConverter()->convertType(shuffleOp.getType());
+ assert(!(shuffleOp.getV1VectorType().isScalable() ||
+ shuffleOp.getV2VectorType().isScalable() ||
+ dstType.cast<VectorType>().isScalable()) &&
+ "scalable vectors are not supported.");
+ if (!isLessThanTargetBitWidth(shuffleOp, targetVectorBitWidth))
+ return rewriter.notifyMatchFailure(
+ shuffleOp, "Can't flatten since targetBitWidth <= OpSize");
+
+ auto vec1 = adaptor.getV1();
+ auto vec2 = adaptor.getV2();
+ int shuffleSliceLen = 1;
+ int rank = shuffleOp.getV1().getType().getRank();
+
+ // If rank > 1, we need to do the shuffle in the granularity of slices
+ // instead of scalars. Size of the slice is equal to the rank-1 innermost
+ // dims. Mask of the shuffle op specifies which slice to take from the
+ // outermost dim.
+ if (rank > 1) {
+ auto shape = shuffleOp.getV1().getType().getShape();
+ for (unsigned i = 1; i < shape.size(); i++) {
----------------
dcaballe wrote:
pre-increment
https://github.com/llvm/llvm-project/pull/88204
More information about the Mlir-commits
mailing list