[Mlir-commits] [mlir] [MLIR][Vector]Generalize DropUnitDimFromElementwiseOps (PR #92934)

Wed Jun 12 01:15:46 PDT 2024

https://github.com/nujaa updated https://github.com/llvm/llvm-project/pull/92934

>From a16a2364872b7b76e56745abab9697688c93d755 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Wed, 8 May 2024 23:02:44 +0800
Subject: [PATCH 01/11] [MLIR][Vector]Generalize DropUnitDimFromElementwiseOps

---
 .../Vector/Transforms/VectorTransforms.cpp    | 55 +++++++++++--------
 .../Vector/vector-transfer-flatten.mlir       | 18 ++++++
 2 files changed, 49 insertions(+), 24 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index f29eba90c3ceb..364b9ff3e5483 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1607,7 +1607,24 @@ struct ChainedReduction final : OpRewritePattern<vector::ReductionOp> {
   }
 };
 
-/// For vectors with either leading or trailing unit dim, replaces:
+FailureOr<VectorType> dropNonScalableUnitDimType(VectorType VT) {
+  VectorType newVT = VT;
+  int removed = 0;
+  auto shape = VT.getShape();
+  for (unsigned i = 0; i < shape.size(); i++) {
+    if (shape[i] == 1 && !VT.getScalableDims()[i]) {
+      newVT = VectorType::Builder(newVT).dropDim(i - removed);
+      removed++;
+    }
+  }
+
+  if (removed == 0)
+    return failure();
+  return newVT;
+}
+
+
+/// For vectors with at least an unit dim, replaces:
 ///   elementwise(a, b)
 /// with:
 ///   sc_a = shape_cast(a)
@@ -1641,7 +1658,9 @@ struct DropUnitDimFromElementwiseOps final
   using OpTraitRewritePattern::OpTraitRewritePattern;
   LogicalResult matchAndRewrite(Operation *op,
                                 PatternRewriter &rewriter) const override {
-    if (op->getNumResults() != 1 || op->getNumRegions() != 0)
+    if (op->getNumResults() != 1)
+      return failure();
+    if (op->getNumRegions() != 0)
       return failure();
 
     auto resultVectorType = dyn_cast<VectorType>(op->getResult(0).getType());
@@ -1652,42 +1671,30 @@ struct DropUnitDimFromElementwiseOps final
     // guaranteed to have identical shapes (with some exceptions such as
     // `arith.select`) and it suffices to only check one of them.
     auto sourceVectorType = dyn_cast<VectorType>(op->getOperand(0).getType());
-    if (!sourceVectorType)
-      return failure();
-    if (sourceVectorType.getRank() < 2)
-      return failure();
-
-    bool hasTrailingDimUnitFixed =
-        ((sourceVectorType.getShape().back() == 1) &&
-         (!sourceVectorType.getScalableDims().back()));
-    bool hasLeadingDimUnitFixed =
-        ((sourceVectorType.getShape().front() == 1) &&
-         (!sourceVectorType.getScalableDims().front()));
-    if (!hasLeadingDimUnitFixed && !hasTrailingDimUnitFixed)
+    if (!sourceVectorType || sourceVectorType.getRank() < 2)
       return failure();
 
-    // Drop leading/trailing unit dim by applying vector.shape_cast to all
-    // operands
-    int64_t dim = hasLeadingDimUnitFixed ? 0 : sourceVectorType.getRank() - 1;
-
     SmallVector<Value> newOperands;
     auto loc = op->getLoc();
     for (auto operand : op->getOperands()) {
       auto opVectorType = cast<VectorType>(operand.getType());
-      VectorType newVType = VectorType::Builder(opVectorType).dropDim(dim);
-      auto opSC = rewriter.create<vector::ShapeCastOp>(loc, newVType, operand);
+      auto newVType = dropNonScalableUnitDimType(opVectorType);
+      if (failed(newVType)) {
+        return failure();
+      }
+      auto opSC =
+          rewriter.create<vector::ShapeCastOp>(loc, newVType.value(), operand);
       newOperands.push_back(opSC);
     }
 
     VectorType newResultVectorType =
-        VectorType::Builder(resultVectorType).dropDim(dim);
-    // Create an updated elementwise Op without leading/trailing unit dim
+        dropNonScalableUnitDimType(resultVectorType).value();
+    // Create an updated elementwise Op without unit dim
     Operation *elementwiseOp =
         rewriter.create(loc, op->getName().getIdentifier(), newOperands,
                         newResultVectorType, op->getAttrs());
 
-    // Restore the leading/trailing unit dim by applying vector.shape_cast
-    // to the result
+    // Restore the unit dim by applying vector.shape_cast to the result
     rewriter.replaceOpWithNewOp<ShapeCastOp>(op, resultVectorType,
                                              elementwiseOp->getResult(0));
 
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index d7365d25d21b4..37f3c98e09081 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -499,6 +499,24 @@ func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>,
 
 // -----
 
+func.func @fold_unit_center_dim_scalable(%arg0 : vector<8x1x[1]xf128>,
+                              %arg1 : vector<1x8x[1]xf128>) -> vector<8x[1]xf128> {
+   %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x[1]xf128> to vector<8x1x[1]xf128>
+   %add = arith.mulf %arg0, %sc_arg1 : vector<8x1x[1]xf128>
+   %res = vector.shape_cast %add : vector<8x1x[1]xf128> to vector<8x[1]xf128>
+   return %res : vector<8x[1]xf128>
+}
+
+// CHECK-LABEL: func.func @fold_unit_center_dim_scalable(
+// CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x[1]xf128>,
+// CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x[1]xf128>) -> vector<8x[1]xf128> {
+// CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x[1]xf128> to vector<8x[1]xf128>
+// CHECK:         %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8x[1]xf128> to vector<8x[1]xf128>
+// CHECK:         %[[VAL_4:.*]] = arith.mulf %[[VAL_2]], %[[VAL_3]] : vector<8x[1]xf128>
+// CHECK:         return %[[VAL_4]] : vector<8x[1]xf128>
+
+// -----
+
 func.func @negative_out_of_bound_transfer_read(
     %arg : memref<?x4x3x2xi8, strided<[24, 6, 2, 1], offset: ?>>) -> vector<5x4x3x2xi8> {
   %c0 = arith.constant 0 : index

>From 119d206bcc1fe421449b1b5f565ab6859ad7fa8e Mon Sep 17 00:00:00 2001
From: Hugo Trachino <hugo.trachino at huawei.com>
Date: Tue, 21 May 2024 17:55:23 +0100
Subject: [PATCH 02/11] remove unrelated change.

---
 mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 364b9ff3e5483..e772d4bbea311 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1623,7 +1623,6 @@ FailureOr<VectorType> dropNonScalableUnitDimType(VectorType VT) {
   return newVT;
 }
 
-
 /// For vectors with at least an unit dim, replaces:
 ///   elementwise(a, b)
 /// with:
@@ -1658,9 +1657,7 @@ struct DropUnitDimFromElementwiseOps final
   using OpTraitRewritePattern::OpTraitRewritePattern;
   LogicalResult matchAndRewrite(Operation *op,
                                 PatternRewriter &rewriter) const override {
-    if (op->getNumResults() != 1)
-      return failure();
-    if (op->getNumRegions() != 0)
+    if (op->getNumResults() != 1 || op->getNumRegions() != 0)
       return failure();
 
     auto resultVectorType = dyn_cast<VectorType>(op->getResult(0).getType());

>From 1c32194ac179547da711a7742d61a0b75cb784b0 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Wed, 22 May 2024 16:27:31 +0800
Subject: [PATCH 03/11] Fixup hoist VT builder.

---
 mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index e772d4bbea311..cf08791498109 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1611,9 +1611,10 @@ FailureOr<VectorType> dropNonScalableUnitDimType(VectorType VT) {
   VectorType newVT = VT;
   int removed = 0;
   auto shape = VT.getShape();
+  auto builder = VectorType::Builder(newVT);
   for (unsigned i = 0; i < shape.size(); i++) {
     if (shape[i] == 1 && !VT.getScalableDims()[i]) {
-      newVT = VectorType::Builder(newVT).dropDim(i - removed);
+      newVT = builder.dropDim(i - removed);
       removed++;
     }
   }

>From d7c76f4f3cee2de69b7b140fa1265cabc8a368e7 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Wed, 22 May 2024 17:29:27 +0800
Subject: [PATCH 04/11] Hoist VT builder properly.

---
 mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index cf08791498109..bdff1ba3b3907 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1608,20 +1608,19 @@ struct ChainedReduction final : OpRewritePattern<vector::ReductionOp> {
 };
 
 FailureOr<VectorType> dropNonScalableUnitDimType(VectorType VT) {
-  VectorType newVT = VT;
   int removed = 0;
   auto shape = VT.getShape();
-  auto builder = VectorType::Builder(newVT);
+  auto builder = VectorType::Builder(VT);
   for (unsigned i = 0; i < shape.size(); i++) {
     if (shape[i] == 1 && !VT.getScalableDims()[i]) {
-      newVT = builder.dropDim(i - removed);
+      builder.dropDim(i - removed);
       removed++;
     }
   }
 
   if (removed == 0)
     return failure();
-  return newVT;
+  return VectorType(builder);
 }
 
 /// For vectors with at least an unit dim, replaces:

>From be94fb0fb78381f9db7316cf1ec10064d51043ce Mon Sep 17 00:00:00 2001
From: Hugo Trachino <hugo.trachino at huawei.com>
Date: Thu, 23 May 2024 09:37:24 +0100
Subject: [PATCH 05/11] Refactor tests

Rename test functions. Add scalable specific test.
---
 .../Vector/vector-transfer-flatten.mlir       | 44 +++++++++++++------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index 37f3c98e09081..809a5feeb0132 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -499,21 +499,39 @@ func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>,
 
 // -----
 
-func.func @fold_unit_center_dim_scalable(%arg0 : vector<8x1x[1]xf128>,
-                              %arg1 : vector<1x8x[1]xf128>) -> vector<8x[1]xf128> {
-   %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x[1]xf128> to vector<8x1x[1]xf128>
-   %add = arith.mulf %arg0, %sc_arg1 : vector<8x1x[1]xf128>
-   %res = vector.shape_cast %add : vector<8x1x[1]xf128> to vector<8x[1]xf128>
-   return %res : vector<8x[1]xf128>
+func.func @fold_unit_inner_dim(%arg0 : vector<8x1x3xf128>,
+                              %arg1 : vector<1x8x3xf128>) -> vector<8x3xf128> {
+   %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x3xf128> to vector<8x1x3xf128>
+   %mul = arith.mulf %arg0, %sc_arg1 : vector<8x1x3xf128>
+   %res = vector.shape_cast %mul : vector<8x1x3xf128> to vector<8x3xf128>
+   return %res : vector<8x3xf128>
 }
 
-// CHECK-LABEL: func.func @fold_unit_center_dim_scalable(
-// CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x[1]xf128>,
-// CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x[1]xf128>) -> vector<8x[1]xf128> {
-// CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x[1]xf128> to vector<8x[1]xf128>
-// CHECK:         %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8x[1]xf128> to vector<8x[1]xf128>
-// CHECK:         %[[VAL_4:.*]] = arith.mulf %[[VAL_2]], %[[VAL_3]] : vector<8x[1]xf128>
-// CHECK:         return %[[VAL_4]] : vector<8x[1]xf128>
+// CHECK-LABEL: func.func @fold_unit_inner_dim(
+// CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x3xf128>,
+// CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x3xf128>) -> vector<8xxf128> {
+// CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x3xf128> to vector<8x3xf128>
+// CHECK:         %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8x3xf128> to vector<8x3xf128>
+// CHECK:         %[[VAL_4:.*]] = arith.mulf %[[VAL_2]], %[[VAL_3]] : vector<8x3xf128>
+// CHECK:         return %[[VAL_4]] : vector<8x3xf128>
+
+// -----
+
+func.func @fold_unit_inner_dim_scalable(%arg0 : vector<8x1x[1]x3xf128>,
+                              %arg1 : vector<1x8x[1]x3xf128>) -> vector<8x[1]x3xf128> {
+   %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x[1]x3xf128> to vector<8x1x[1]x3xf128>
+   %mul = arith.mulf %arg0, %sc_arg1 : vector<8x1x[1]x3xf128>
+   %res = vector.shape_cast %add : vector<8x1x[1]x3xf128> to vector<8x[1]x3xf128>
+   return %mul : vector<8x[1]x3xf128>
+}
+
+// CHECK-LABEL: func.func @fold_unit_inner_dim_scalable(
+// CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x[1]x3xf128>,
+// CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x[1]x3xf128>) -> vector<8x[1]x3xf128> {
+// CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x[1]x3xf128> to vector<8x[1]x3xf128>
+// CHECK:         %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8x[1]x3xf128> to vector<8x[1]x3xf128>
+// CHECK:         %[[VAL_4:.*]] = arith.mulf %[[VAL_2]], %[[VAL_3]] : vector<8x[1]x3xf128>
+// CHECK:         return %[[VAL_4]] : vector<8x[1]x3xf128>
 
 // -----
 

>From fad986bbec860834551c8c62873f96efbefe2fce Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Tue, 28 May 2024 18:54:13 +0800
Subject: [PATCH 06/11] Fix naming and test

---
 .../Vector/Transforms/VectorTransforms.cpp    | 20 +++++++++----------
 .../Vector/vector-transfer-flatten.mlir       |  6 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index bdff1ba3b3907..40dda6ddb25cc 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1607,20 +1607,20 @@ struct ChainedReduction final : OpRewritePattern<vector::ReductionOp> {
   }
 };
 
-FailureOr<VectorType> dropNonScalableUnitDimType(VectorType VT) {
-  int removed = 0;
-  auto shape = VT.getShape();
-  auto builder = VectorType::Builder(VT);
-  for (unsigned i = 0; i < shape.size(); i++) {
-    if (shape[i] == 1 && !VT.getScalableDims()[i]) {
-      builder.dropDim(i - removed);
-      removed++;
+FailureOr<VectorType> dropNonScalableUnitDimType(VectorType inVecTy) {
+  int numUnitDimsDropped = 0;
+  auto inVecShape = inVecTy.getShape();
+  auto newVecBuilder = VectorType::Builder(inVecTy);
+  for (unsigned i = 0; i < inVecShape.size(); i++) {
+    if (inVecShape[i] == 1 && !inVecTy.getScalableDims()[i]) {
+      newVecBuilder.dropDim(i - numUnitDimsDropped);
+      numUnitDimsDropped++;
     }
   }
 
-  if (removed == 0)
+  if (numUnitDimsDropped == 0)
     return failure();
-  return VectorType(builder);
+  return VectorType(newVecBuilder);
 }
 
 /// For vectors with at least an unit dim, replaces:
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index 809a5feeb0132..ef80ec6f47999 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -509,7 +509,7 @@ func.func @fold_unit_inner_dim(%arg0 : vector<8x1x3xf128>,
 
 // CHECK-LABEL: func.func @fold_unit_inner_dim(
 // CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x3xf128>,
-// CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x3xf128>) -> vector<8xxf128> {
+// CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x3xf128>) -> vector<8x3xf128> {
 // CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x3xf128> to vector<8x3xf128>
 // CHECK:         %[[VAL_3:.*]] = vector.shape_cast %[[VAL_1]] : vector<1x8x3xf128> to vector<8x3xf128>
 // CHECK:         %[[VAL_4:.*]] = arith.mulf %[[VAL_2]], %[[VAL_3]] : vector<8x3xf128>
@@ -521,8 +521,8 @@ func.func @fold_unit_inner_dim_scalable(%arg0 : vector<8x1x[1]x3xf128>,
                               %arg1 : vector<1x8x[1]x3xf128>) -> vector<8x[1]x3xf128> {
    %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x[1]x3xf128> to vector<8x1x[1]x3xf128>
    %mul = arith.mulf %arg0, %sc_arg1 : vector<8x1x[1]x3xf128>
-   %res = vector.shape_cast %add : vector<8x1x[1]x3xf128> to vector<8x[1]x3xf128>
-   return %mul : vector<8x[1]x3xf128>
+   %res = vector.shape_cast %mul : vector<8x1x[1]x3xf128> to vector<8x[1]x3xf128>
+   return %res : vector<8x[1]x3xf128>
 }
 
 // CHECK-LABEL: func.func @fold_unit_inner_dim_scalable(

>From 32e81c5f6a483a323d5479673e4d81b569dbcf62 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Fri, 31 May 2024 19:04:23 +0800
Subject: [PATCH 07/11] helper function does not return failure.

---
 .../Vector/Transforms/VectorTransforms.cpp    | 29 +++++++++----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 40dda6ddb25cc..5e7356fd14861 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1607,20 +1607,20 @@ struct ChainedReduction final : OpRewritePattern<vector::ReductionOp> {
   }
 };
 
-FailureOr<VectorType> dropNonScalableUnitDimType(VectorType inVecTy) {
-  int numUnitDimsDropped = 0;
-  auto inVecShape = inVecTy.getShape();
+VectorType dropNonScalableUnitDimType(VectorType inVecTy) {
   auto newVecBuilder = VectorType::Builder(inVecTy);
-  for (unsigned i = 0; i < inVecShape.size(); i++) {
-    if (inVecShape[i] == 1 && !inVecTy.getScalableDims()[i]) {
-      newVecBuilder.dropDim(i - numUnitDimsDropped);
-      numUnitDimsDropped++;
+  auto inVecShape = inVecTy.getShape();
+  SmallVector<int64_t> newShape;
+  SmallVector<bool> newScalableDims;
+  for (auto [dim, isScalable] :
+       llvm::zip(inVecShape, inVecTy.getScalableDims())) {
+    if (dim != 1 || isScalable) {
+      newShape.push_back(dim);
+      newScalableDims.push_back(isScalable);
     }
   }
 
-  if (numUnitDimsDropped == 0)
-    return failure();
-  return VectorType(newVecBuilder);
+  return VectorType::get(newShape, inVecTy.getElementType(), newScalableDims);
 }
 
 /// For vectors with at least an unit dim, replaces:
@@ -1676,16 +1676,15 @@ struct DropUnitDimFromElementwiseOps final
     for (auto operand : op->getOperands()) {
       auto opVectorType = cast<VectorType>(operand.getType());
       auto newVType = dropNonScalableUnitDimType(opVectorType);
-      if (failed(newVType)) {
-        return failure();
+      if (newVType == opVectorType) {
+        return rewriter.notifyMatchFailure(op, "No unit dimension to remove.");
       }
-      auto opSC =
-          rewriter.create<vector::ShapeCastOp>(loc, newVType.value(), operand);
+      auto opSC = rewriter.create<vector::ShapeCastOp>(loc, newVType, operand);
       newOperands.push_back(opSC);
     }
 
     VectorType newResultVectorType =
-        dropNonScalableUnitDimType(resultVectorType).value();
+        dropNonScalableUnitDimType(resultVectorType);
     // Create an updated elementwise Op without unit dim
     Operation *elementwiseOp =
         rewriter.create(loc, op->getName().getIdentifier(), newOperands,

>From 7b09906f56fde0893914f113cb6dc6b33998f2bc Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Mon, 3 Jun 2024 16:20:39 +0800
Subject: [PATCH 08/11] Fixup: simple exit and use zip_equal

---
 .../Vector/Transforms/VectorTransforms.cpp        | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 5e7356fd14861..43d5b32a38a8b 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1613,11 +1613,12 @@ VectorType dropNonScalableUnitDimType(VectorType inVecTy) {
   SmallVector<int64_t> newShape;
   SmallVector<bool> newScalableDims;
   for (auto [dim, isScalable] :
-       llvm::zip(inVecShape, inVecTy.getScalableDims())) {
-    if (dim != 1 || isScalable) {
-      newShape.push_back(dim);
-      newScalableDims.push_back(isScalable);
-    }
+       llvm::zip_equal(inVecShape, inVecTy.getScalableDims())) {
+    if (dim == 1 && !isScalable)
+      continue;
+
+    newShape.push_back(dim);
+    newScalableDims.push_back(isScalable);
   }
 
   return VectorType::get(newShape, inVecTy.getElementType(), newScalableDims);
@@ -1676,9 +1677,9 @@ struct DropUnitDimFromElementwiseOps final
     for (auto operand : op->getOperands()) {
       auto opVectorType = cast<VectorType>(operand.getType());
       auto newVType = dropNonScalableUnitDimType(opVectorType);
-      if (newVType == opVectorType) {
+      if (newVType == opVectorType)
         return rewriter.notifyMatchFailure(op, "No unit dimension to remove.");
-      }
+
       auto opSC = rewriter.create<vector::ShapeCastOp>(loc, newVType, operand);
       newOperands.push_back(opSC);
     }

>From fbbb43fb43ed44b9c23869f364ffcbfeda42509a Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Mon, 3 Jun 2024 20:11:06 +0800
Subject: [PATCH 09/11] Fixup : comment hook

---
 mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp | 9 ++++++---
 mlir/test/Dialect/Vector/vector-transfer-flatten.mlir   | 8 ++++----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 43d5b32a38a8b..353e28d578118 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1607,7 +1607,10 @@ struct ChainedReduction final : OpRewritePattern<vector::ReductionOp> {
   }
 };
 
-VectorType dropNonScalableUnitDimType(VectorType inVecTy) {
+/// Drop unit dimension of the input VectorType. Scalable dimensions cannot be
+/// folded as we do not want to merge them through a shape_cast and create ill
+/// shaped scalable sizes.
+static VectorType dropNonScalableUnitDimFromType(VectorType inVecTy) {
   auto newVecBuilder = VectorType::Builder(inVecTy);
   auto inVecShape = inVecTy.getShape();
   SmallVector<int64_t> newShape;
@@ -1676,7 +1679,7 @@ struct DropUnitDimFromElementwiseOps final
     auto loc = op->getLoc();
     for (auto operand : op->getOperands()) {
       auto opVectorType = cast<VectorType>(operand.getType());
-      auto newVType = dropNonScalableUnitDimType(opVectorType);
+      auto newVType = dropNonScalableUnitDimFromType(opVectorType);
       if (newVType == opVectorType)
         return rewriter.notifyMatchFailure(op, "No unit dimension to remove.");
 
@@ -1685,7 +1688,7 @@ struct DropUnitDimFromElementwiseOps final
     }
 
     VectorType newResultVectorType =
-        dropNonScalableUnitDimType(resultVectorType);
+        dropNonScalableUnitDimFromType(resultVectorType);
     // Create an updated elementwise Op without unit dim
     Operation *elementwiseOp =
         rewriter.create(loc, op->getName().getIdentifier(), newOperands,
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index ef80ec6f47999..42bf7201daaa7 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -499,7 +499,7 @@ func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>,
 
 // -----
 
-func.func @fold_unit_inner_dim(%arg0 : vector<8x1x3xf128>,
+func.func @fold_inner_unit_dim(%arg0 : vector<8x1x3xf128>,
                               %arg1 : vector<1x8x3xf128>) -> vector<8x3xf128> {
    %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x3xf128> to vector<8x1x3xf128>
    %mul = arith.mulf %arg0, %sc_arg1 : vector<8x1x3xf128>
@@ -507,7 +507,7 @@ func.func @fold_unit_inner_dim(%arg0 : vector<8x1x3xf128>,
    return %res : vector<8x3xf128>
 }
 
-// CHECK-LABEL: func.func @fold_unit_inner_dim(
+// CHECK-LABEL: func.func @fold_inner_unit_dim(
 // CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x3xf128>,
 // CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x3xf128>) -> vector<8x3xf128> {
 // CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x3xf128> to vector<8x3xf128>
@@ -517,7 +517,7 @@ func.func @fold_unit_inner_dim(%arg0 : vector<8x1x3xf128>,
 
 // -----
 
-func.func @fold_unit_inner_dim_scalable(%arg0 : vector<8x1x[1]x3xf128>,
+func.func @fold_inner_unit_dim_scalable(%arg0 : vector<8x1x[1]x3xf128>,
                               %arg1 : vector<1x8x[1]x3xf128>) -> vector<8x[1]x3xf128> {
    %sc_arg1 = vector.shape_cast %arg1 : vector<1x8x[1]x3xf128> to vector<8x1x[1]x3xf128>
    %mul = arith.mulf %arg0, %sc_arg1 : vector<8x1x[1]x3xf128>
@@ -525,7 +525,7 @@ func.func @fold_unit_inner_dim_scalable(%arg0 : vector<8x1x[1]x3xf128>,
    return %res : vector<8x[1]x3xf128>
 }
 
-// CHECK-LABEL: func.func @fold_unit_inner_dim_scalable(
+// CHECK-LABEL: func.func @fold_inner_unit_dim_scalable(
 // CHECK-SAME:    %[[VAL_0:.*]]: vector<8x1x[1]x3xf128>,
 // CHECK-SAME:    %[[VAL_1:.*]]: vector<1x8x[1]x3xf128>) -> vector<8x[1]x3xf128> {
 // CHECK:         %[[VAL_2:.*]] = vector.shape_cast %[[VAL_0]] : vector<8x1x[1]x3xf128> to vector<8x[1]x3xf128>

>From 6b2204fa582a60b114d8be45b624bcf52da3f3ba Mon Sep 17 00:00:00 2001
From: Hugo Trachino <hugo.trachino at huawei.com>
Date: Tue, 4 Jun 2024 09:04:31 +0100
Subject: [PATCH 10/11] Comment update.

Added punctutation, removed inline code '`' in Doxygen
---
 mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 353e28d578118..6f77ddcef593d 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1639,20 +1639,16 @@ static VectorType dropNonScalableUnitDimFromType(VectorType inVecTy) {
 /// required to be rank > 1.
 ///
 /// Ex:
-/// ```
 ///  %mul = arith.mulf %B_row, %A_row : vector<1x[4]xf32>
 ///  %cast = vector.shape_cast %mul : vector<1x[4]xf32> to vector<[4]xf32>
-/// ```
 ///
 /// gets converted to:
 ///
-/// ```
 ///  %B_row_sc = vector.shape_cast %B_row : vector<1x[4]xf32> to vector<[4]xf32>
 ///  %A_row_sc = vector.shape_cast %A_row : vector<1x[4]xf32> to vector<[4]xf32>
 ///  %mul = arith.mulf %B_row_sc, %A_row_sc : vector<[4]xf32>
 ///  %cast_new = vector.shape_cast %mul : vector<[4]xf32> to vector<1x[4]xf32>
 ///  %cast = vector.shape_cast %cast_new : vector<1x[4]xf32> to vector<[4]xf32>
-/// ```
 ///
 /// Patterns for folding shape_casts should instantly eliminate `%cast_new` and
 /// `%cast`.
@@ -1689,12 +1685,12 @@ struct DropUnitDimFromElementwiseOps final
 
     VectorType newResultVectorType =
         dropNonScalableUnitDimFromType(resultVectorType);
-    // Create an updated elementwise Op without unit dim
+    // Create an updated elementwise Op without unit dim.
     Operation *elementwiseOp =
         rewriter.create(loc, op->getName().getIdentifier(), newOperands,
                         newResultVectorType, op->getAttrs());
 
-    // Restore the unit dim by applying vector.shape_cast to the result
+    // Restore the unit dim by applying vector.shape_cast to the result.
     rewriter.replaceOpWithNewOp<ShapeCastOp>(op, resultVectorType,
                                              elementwiseOp->getResult(0));
 

>From 03aa4b6fd1cb2a7a29314c62fe31a08d4b208fd9 Mon Sep 17 00:00:00 2001
From: Hugo <hugo.trachino at huawei.com>
Date: Wed, 12 Jun 2024 16:15:28 +0800
Subject: [PATCH 11/11] Update comment

---
 mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 6f77ddcef593d..cb9568d4202bc 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1607,9 +1607,10 @@ struct ChainedReduction final : OpRewritePattern<vector::ReductionOp> {
   }
 };
 
-/// Drop unit dimension of the input VectorType. Scalable dimensions cannot be
-/// folded as we do not want to merge them through a shape_cast and create ill
-/// shaped scalable sizes.
+// Scalable unit dimensions are not supported. Folding such dimensions would
+// require "shifting" the scalable flag onto some other fixed-width dim (e.g.
+// vector<[1]x4xf32> -> vector<[4]xf32>). This could be implemented in the
+// future.
 static VectorType dropNonScalableUnitDimFromType(VectorType inVecTy) {
   auto newVecBuilder = VectorType::Builder(inVecTy);
   auto inVecShape = inVecTy.getShape();