[Mlir-commits] [mlir] [MLIR][Vector]Generalize DropUnitDimFromElementwiseOps (PR #92934)

Tue May 21 09:53:07 PDT 2024

https://github.com/nujaa created https://github.com/llvm/llvm-project/pull/92934

Generalizes `DropUnitDimFromElementwiseOps` to support inner unit dimensions.

discussed [here](https://discourse.llvm.org/t/mlir-for-arm-sme-vectorizing-matmul-like-ops-as-part-of-a-broader-program/78603).

>From 19695b46aba3a76f22962d4afab894443a63c672 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Wed, 8 May 2024 23:02:44 +0800
Subject: [PATCH] [MLIR][Vector]Generalize DropUnitDimFromElementwiseOps

---
 .../Vector/Transforms/VectorTransforms.cpp    | 55 +++++++++++--------
 .../Vector/vector-transfer-flatten.mlir       | 20 +++++++
 2 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index f29eba90c3ceb..364b9ff3e5483 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1607,7 +1607,24 @@ struct ChainedReduction final : OpRewritePattern<vector::ReductionOp> {
   }
 };
 
-/// For vectors with either leading or trailing unit dim, replaces:
+FailureOr<VectorType> dropNonScalableUnitDimType(VectorType VT) {
+  VectorType newVT = VT;
+  int removed = 0;
+  auto shape = VT.getShape();
+  for (unsigned i = 0; i < shape.size(); i++) {
+    if (shape[i] == 1 && !VT.getScalableDims()[i]) {
+      newVT = VectorType::Builder(newVT).dropDim(i - removed);
+      removed++;
+    }
+  }
+
+  if (removed == 0)
+    return failure();
+  return newVT;
+}
+
+
+/// For vectors with at least an unit dim, replaces:
 ///   elementwise(a, b)
 /// with:
 ///   sc_a = shape_cast(a)
@@ -1641,7 +1658,9 @@ struct DropUnitDimFromElementwiseOps final
   using OpTraitRewritePattern::OpTraitRewritePattern;
   LogicalResult matchAndRewrite(Operation *op,
                                 PatternRewriter &rewriter) const override {
-    if (op->getNumResults() != 1 || op->getNumRegions() != 0)
+    if (op->getNumResults() != 1)
+      return failure();
+    if (op->getNumRegions() != 0)
       return failure();
 
     auto resultVectorType = dyn_cast<VectorType>(op->getResult(0).getType());
@@ -1652,42 +1671,30 @@ struct DropUnitDimFromElementwiseOps final
     // guaranteed to have identical shapes (with some exceptions such as
     // `arith.select`) and it suffices to only check one of them.
     auto sourceVectorType = dyn_cast<VectorType>(op->getOperand(0).getType());
-    if (!sourceVectorType)
-      return failure();
-    if (sourceVectorType.getRank() < 2)
-      return failure();
-
-    bool hasTrailingDimUnitFixed =
-        ((sourceVectorType.getShape().back() == 1) &&
-         (!sourceVectorType.getScalableDims().back()));
-    bool hasLeadingDimUnitFixed =
-        ((sourceVectorType.getShape().front() == 1) &&
-         (!sourceVectorType.getScalableDims().front()));
-    if (!hasLeadingDimUnitFixed && !hasTrailingDimUnitFixed)
+    if (!sourceVectorType || sourceVectorType.getRank() < 2)
       return failure();
 
-    // Drop leading/trailing unit dim by applying vector.shape_cast to all
-    // operands
-    int64_t dim = hasLeadingDimUnitFixed ? 0 : sourceVectorType.getRank() - 1;
-
     SmallVector<Value> newOperands;
     auto loc = op->getLoc();
     for (auto operand : op->getOperands()) {
       auto opVectorType = cast<VectorType>(operand.getType());
-      VectorType newVType = VectorType::Builder(opVectorType).dropDim(dim);
-      auto opSC = rewriter.create<vector::ShapeCastOp>(loc, newVType, operand);
+      auto newVType = dropNonScalableUnitDimType(opVectorType);
+      if (failed(newVType)) {
+        return failure();
+      }
+      auto opSC =
+          rewriter.create<vector::ShapeCastOp>(loc, newVType.value(), operand);
       newOperands.push_back(opSC);
     }
 
     VectorType newResultVectorType =
-        VectorType::Builder(resultVectorType).dropDim(dim);
-    // Create an updated elementwise Op without leading/trailing unit dim
+        dropNonScalableUnitDimType(resultVectorType).value();
+    // Create an updated elementwise Op without unit dim
     Operation *elementwiseOp =
         rewriter.create(loc, op->getName().getIdentifier(), newOperands,
                         newResultVectorType, op->getAttrs());
 
-    // Restore the leading/trailing unit dim by applying vector.shape_cast
-    // to the result
+    // Restore the unit dim by applying vector.shape_cast to the result
     rewriter.replaceOpWithNewOp<ShapeCastOp>(op, resultVectorType,
                                              elementwiseOp->getResult(0));
 
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index 788ae9ac044ed..03c19742355bf 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -459,6 +459,26 @@ func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>,
 // CHECK-128B-LABEL: func @fold_unit_dims_entirely(
 //   CHECK-128B-NOT:   memref.collapse_shape
 
+// -----
+
+func.func @fold_unit_center_dim_scalable(%arg0 : vector<8x1x[1]xf128>,
+                              %arg1 : vector<1x8x[1]xf128>) -> vector<8x[1]xf128> {
+   %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x[1]xf128> to vector<8x1x[1]xf128>
+   %add = arith.mulf %arg0, %sc_arg1 : vector<8x1x[1]xf128>
+   %res = vector.shape_cast %add : vector<8x1x[1]xf128> to vector<8x[1]xf128>
+   return %res : vector<8x[1]xf128>
+}
+
+// CHECK-LABEL: func.func @fold_unit_center_dim_scalable(
+// CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x[1]xf128>,
+// CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x[1]xf128>) -> vector<8x[1]xf128> {
+// CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x[1]xf128> to vector<8x[1]xf128>
+// CHECK:         %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8x[1]xf128> to vector<8x[1]xf128>
+// CHECK:         %[[VAL_4:.*]] = arith.mulf %[[VAL_2]], %[[VAL_3]] : vector<8x[1]xf128>
+// CHECK:         return %[[VAL_4]] : vector<8x[1]xf128>
+
+
+
 
 // -----