[Mlir-commits] [mlir] [mlir][vector]Enable DropUnitDimFromTransposeOp (PR #93007)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed May 22 02:14:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-vector
Author: Hugo Trachino (nujaa)
<details>
<summary>Changes</summary>
This MR is part of a list of MRs aiming to generalize `DropUnitDimFromElementwiseOps` for other ops.
This commit implements `DropUnitDimFromTransposeOp` to target `vector::TransposeOp`.
Discussed [here](https://discourse.llvm.org/t/on-improving-arm-sme-lowering-resilience-in-mlir/78543/17?u=nujaa).
---
Full diff: https://github.com/llvm/llvm-project/pull/93007.diff
2 Files Affected:
- (modified) mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp (+73-2)
- (modified) mlir/test/Dialect/Vector/vector-transfer-flatten.mlir (+74)
``````````diff
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index f29eba90c3ceb..fe74a6446ceed 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1695,6 +1695,77 @@ struct DropUnitDimFromElementwiseOps final
}
};
+
+/// Removes unit dimensions from a transpose op. Generates a vector.shape_cast
+/// on the operand and result to match types.
+///
+/// Ex:
+/// ```
+/// %tr = vector.transpose %arg0, [3, 1, 2, 0]: vector<1x4x1x2xf32> to
+/// vector<2x4x1x1xf32>
+/// ```
+///
+/// gets converted to:
+///
+/// ```
+/// %sc0 = vector.shape_cast %arg0 : vector<1x4x1x2xf32> to vector<4x2xf32>
+/// %tr = vector.transpose %sc0, [1, 0] : vector<4x2xf32> to vector<2x4xf32>
+/// %sc1 = vector.shape_cast %tr : vector<2x4xf32> to vector<2x4x1x1xf32>
+/// ```
+struct DropUnitDimFromTransposeOp final
+ : public OpRewritePattern<vector::TransposeOp> {
+ using OpRewritePattern::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(vector::TransposeOp transposeOp,
+ PatternRewriter &rewriter) const override {
+ auto sourceVectorType = transposeOp.getSourceVectorType();
+ if (sourceVectorType.getRank() < 2)
+ return failure();
+
+ VectorType newVType = sourceVectorType;
+ SmallVector<int64_t> newPerm =
+ llvm::to_vector(transposeOp.getPermutation());
+ unsigned removedDims = 0;
+ auto shape = sourceVectorType.getShape();
+ for (const auto &dim : llvm::enumerate(shape)) {
+ if (dim.value() == 1 &&
+ !sourceVectorType.getScalableDims()[dim.index()]) {
+ newVType =
+ VectorType::Builder(newVType).dropDim(dim.index() - removedDims);
+ for (unsigned permutationIdx = 0; permutationIdx < newPerm.size();
+ ++permutationIdx) {
+ // Erase from permutation map the dropped unary dimension.
+ if ((unsigned)newPerm[permutationIdx] == dim.index() - removedDims) {
+ newPerm.erase(newPerm.begin() + permutationIdx);
+ permutationIdx--;
+ }
+ // Decrement all dimensions of higher rank to keep permutation map
+ // in range of the new rank.
+ else if ((unsigned)newPerm[permutationIdx] > dim.index() - removedDims) {
+ newPerm[permutationIdx]--;
+ }
+ }
+ removedDims++;
+ }
+ }
+ if (!removedDims)
+ return failure();
+
+ auto loc = transposeOp->getLoc();
+ auto opSC = rewriter.create<vector::ShapeCastOp>(loc, newVType,
+ transposeOp.getVector());
+ // Create an updated Transpose Op without unit dim.
+ vector::TransposeOp newTransposeOp =
+ rewriter.create<vector::TransposeOp>(loc, opSC, newPerm);
+
+ // Restore the unit dim by applying vector.shape_cast to the result.
+ rewriter.replaceOpWithNewOp<ShapeCastOp>(
+ transposeOp, transposeOp.getResultVectorType(), newTransposeOp);
+
+ return failure();
+ }
+};
+
/// Pattern to eliminate redundant zero-constants added to reduction operands.
/// It's enough for there to be one initial zero value, so we can eliminate the
/// extra ones that feed into `vector.reduction <add>`. These get created by the
@@ -1819,8 +1890,8 @@ void mlir::vector::populateShapeCastFoldingPatterns(RewritePatternSet &patterns,
void mlir::vector::populateDropUnitDimWithShapeCastPatterns(
RewritePatternSet &patterns, PatternBenefit benefit) {
- patterns.add<DropUnitDimFromElementwiseOps, ShapeCastOpFolder>(
- patterns.getContext(), benefit);
+ patterns.add<DropUnitDimFromElementwiseOps, DropUnitDimFromTransposeOp,
+ ShapeCastOpFolder>(patterns.getContext(), benefit);
}
void mlir::vector::populateBubbleVectorBitCastOpPatterns(
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index 788ae9ac044ed..1c8fd3a40acb8 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -460,6 +460,80 @@ func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>,
// CHECK-128B-NOT: memref.collapse_shape
+// -----
+
+func.func @fold_unit_dim_transpose(%arg0 : vector<1x4x1x2xf32>) -> vector<2x4x1x1xf32> {
+ %tr = vector.transpose %arg0, [3, 1, 2, 0]: vector<1x4x1x2xf32> to vector<2x4x1x1xf32>
+ return %tr : vector<2x4x1x1xf32>
+}
+// CHECK-LABEL: func.func @fold_unit_dim_transpose(
+// CHECK-SAME: %[[VAL_0:.*]]: vector<1x4x1x2xf32>) -> vector<2x4x1x1xf32> {
+// CHECK: %[[VAL_1:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x4x1x2xf32> to vector<4x2xf32>
+// CHECK: %[[VAL_2:.*]] = vector.transpose %[[VAL_1]], [1, 0] : vector<4x2xf32> to vector<2x4xf32>
+// CHECK: %[[VAL_3:.*]] = vector.shape_cast %[[VAL_2]] : vector<2x4xf32> to vector<2x4x1x1xf32>
+// CHECK: return %[[VAL_3]] : vector<2x4x1x1xf32>
+
+// -----
+
+func.func @fold_unit_dim_transpose_identity(%arg0 : vector<1x4x1x2xf32>) -> vector<1x4x2x1xf32> {
+ %tr = vector.transpose %arg0, [2, 1, 3, 0]: vector<1x4x1x2xf32> to vector<1x4x2x1xf32>
+ return %tr : vector<1x4x2x1xf32>
+}
+// CHECK-LABEL: func.func @fold_unit_dim_transpose_identity(
+// CHECK-SAME: %[[VAL_0:.*]]: vector<1x4x1x2xf32>) -> vector<1x4x2x1xf32> {
+// CHECK: %[[VAL_1:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x4x1x2xf32> to vector<4x2xf32>
+// CHECK: %[[VAL_2:.*]] = vector.shape_cast %[[VAL_1]] : vector<4x2xf32> to vector<1x4x2x1xf32>
+// CHECK: return %[[VAL_2]] : vector<1x4x2x1xf32>
+// -----
+
+func.func @fold_unit_dim_transpose_to_1_dim(%arg0 : vector<1x4xf32>) -> vector<4x1xf32> {
+ %tr = vector.transpose %arg0, [1, 0]: vector<1x4xf32> to vector<4x1xf32>
+ return %tr : vector<4x1xf32>
+}
+
+// CHECK-LABEL: func.func @fold_unit_dim_transpose_to_1_dim(
+// CHECK-SAME: %[[VAL_0:.*]]: vector<1x4xf32>) -> vector<4x1xf32> {
+// CHECK: %[[VAL_1:.*]] = vector.shape_cast %[[VAL_0]] : vector<1x4xf32> to vector<4xf32>
+// CHECK: %[[VAL_2:.*]] = vector.shape_cast %[[VAL_1]] : vector<4xf32> to vector<4x1xf32>
+// CHECK: return %[[VAL_2]] : vector<4x1xf32>
+
+// -----
+
+func.func @fold_unit_dim_transpose_all_one_dim(%arg0 : vector<1x1x1xf32>) -> vector<1x1x1xf32> {
+ %tr = vector.transpose %arg0, [1, 2, 0]: vector<1x1x1xf32> to vector<1x1x1xf32>
+ return %tr : vector<1x1x1xf32>
+}
+
+// CHECK-LABEL: func.func @fold_unit_dim_transpose_all_one_dim(
+// CHECK-SAME: %[[VAL_0:.*]]: vector<1x1x1xf32>) -> vector<1x1x1xf32> {
+// CHECK: return %[[VAL_0]] : vector<1x1x1xf32>
+
+// -----
+
+func.func @drop_unit_dim_full_example(%inputLHS : vector<[1]xf128>, %inputRHS : vector<[1]xf128>, %acc : vector<[1]x[1]xf128>) -> vector<[1]x[1]xf128> {
+ %lhsCast = vector.shape_cast %inputLHS : vector<[1]xf128> to vector<[1]x1xf128>
+ %lhsBcast = vector.broadcast %lhsCast : vector<[1]x1xf128> to vector<[1]x[1]x1xf128>
+ %lhsT = vector.transpose %lhsBcast, [1, 0, 2] : vector<[1]x[1]x1xf128> to vector<[1]x[1]x1xf128>
+ %rhsCast = vector.shape_cast %inputRHS : vector<[1]xf128> to vector<1x[1]xf128>
+ %rhsBcast = vector.broadcast %rhsCast : vector<1x[1]xf128> to vector<[1]x1x[1]xf128>
+ %rhs = vector.transpose %rhsBcast, [0, 2, 1] : vector<[1]x1x[1]xf128> to vector<[1]x[1]x1xf128>
+ %mul = arith.mulf %lhsT, %rhs : vector<[1]x[1]x1xf128>
+ %dropDim = vector.shape_cast %mul : vector<[1]x[1]x1xf128> to vector<[1]x[1]xf128>
+ %addAcc = arith.addf %acc, %dropDim : vector<[1]x[1]xf128>
+ return %addAcc : vector<[1]x[1]xf128>
+}
+
+// CHECK-LABEL: func.func @drop_unit_dim_full_example(
+// CHECK-SAME: %[[LHS:[a-zA-Z_]+[0-9]]]: vector<[1]xf128>,
+// CHECK-SAME: %[[RHS:[a-zA-Z_]+[0-9]]]: vector<[1]xf128>,
+// CHECK-SAME: %[[ACC:[a-zA-Z_]+[0-9]]]: vector<[1]x[1]xf128>) -> vector<[1]x[1]xf128> {
+// CHECK: %[[LHSBCAST:.*]] = vector.broadcast %[[LHS]] : vector<[1]xf128> to vector<[1]x[1]xf128>
+// CHECK: %[[TRA:.*]] = vector.transpose %[[LHSBCAST]], [1, 0] : vector<[1]x[1]xf128> to vector<[1]x[1]xf128>
+// CHECK: %[[RHSBCAST:.*]] = vector.broadcast %[[RHS]] : vector<[1]xf128> to vector<[1]x[1]xf128>
+// CHECK: %[[MUL:.*]] = arith.mulf %[[TRA]], %[[RHSBCAST]] : vector<[1]x[1]xf128>
+// CHECK: %[[RES:.*]] = arith.addf %[[ACC]], %[[MUL]] : vector<[1]x[1]xf128>
+// CHECK: return %[[RES]] : vector<[1]x[1]xf128>
+
// -----
func.func @regression_non_contiguous_dim_read(%subview : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>,
``````````
</details>
https://github.com/llvm/llvm-project/pull/93007
More information about the Mlir-commits
mailing list