[Mlir-commits] [mlir] [mlir][linalg] Add a pattern to drop unit dim of `linalg.broadcast` (PR #106533)

Thu Aug 29 04:45:05 PDT 2024

https://github.com/CoTinker created https://github.com/llvm/llvm-project/pull/106533

This PR add a pattern to drop unit dim of `linalg.broadcast`. If the broadcasted shape size is 1, it can be droped.
e.g.
```
  %0 = linalg.broadcast ins(%input : tensor<4xf32>)
                        outs(%init : tensor<1x4xf32>)
                        dimensions = [0]
```
converted to:
```
  %collapsed = tensor.collapse_shape %init [[0, 1]] :
                  tensor<1x4xf32> into tensor<4xf32>
  %0 = linalg.broadcast ins(%input : tensor<4xf32>)
                        outs(%collapsed : tensor<4xf32>)
                        dimensions = []
  %expanded = tensor.expand_shape %0 [[0, 1]] output_shape [1, 4] :
                  tensor<4xf32> into tensor<1x4xf32>
```

>From b352719098ac146998a8b424ae5bdb70c2a2131a Mon Sep 17 00:00:00 2001
From: Longsheng Mou <moulongsheng at huawei.com>
Date: Thu, 29 Aug 2024 19:28:06 +0800
Subject: [PATCH] [mlir][linalg] Add a pattern to drop unit dim of
 `linalg.broadcast`

This PR add a pattern to drop unit dim of `linalg.broadcast`.
If the broadcasted shape size is 1, it can be droped.
e.g.
```
  %0 = linalg.broadcast ins(%input : tensor<4xf32>)
                        outs(%init : tensor<1x4xf32>)
                        dimensions = [0]
```
converted to:
```
  %collapsed = tensor.collapse_shape %init [[0, 1]] :
                  tensor<1x4xf32> into tensor<4xf32>
  %0 = linalg.broadcast ins(%input : tensor<4xf32>)
                        outs(%collapsed : tensor<4xf32>)
                        dimensions = []
  %expanded = tensor.expand_shape %0 [[0, 1]] output_shape [1, 4] :
                  tensor<4xf32> into tensor<1x4xf32>
```
---
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp   | 78 +++++++++++++++++++-
 mlir/test/Dialect/Linalg/canonicalize.mlir | 85 +++++++++++++++++-----
 2 files changed, 142 insertions(+), 21 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 76df3ecf2d2bd4..da45abc682f129 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -2134,9 +2134,85 @@ void BroadcastOp::getEffects(
   getGenericEffectsImpl(effects, cast<LinalgOp>(getOperation()));
 }
 
+/// If the broadcasted shape size is 1, it can be droped.
+/// e.g.
+///    ```
+///    %0 = linalg.broadcast ins(%input : tensor<4xf32>)
+///                          outs(%init : tensor<1x4xf32>)
+///                          dimensions = [0]
+///    ```
+/// converted to:
+///    ```
+///    %collapsed = tensor.collapse_shape %init [[0, 1]] :
+///                    tensor<1x4xf32> into tensor<4xf32>
+///    %0 = linalg.broadcast ins(%input : tensor<4xf32>)
+///                          outs(%collapsed : tensor<4xf32>)
+///                          dimensions = []
+///    %expanded = tensor.expand_shape %0 [[0, 1]] output_shape [1, 4] :
+///                    tensor<4xf32> into tensor<1x4xf32>
+///    ```
+struct DropUnitDimOfBroadcastOp : OpRewritePattern<linalg::BroadcastOp> {
+  using OpRewritePattern<linalg::BroadcastOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(linalg::BroadcastOp broadcastOp,
+                                PatternRewriter &rewriter) const override {
+    if (!broadcastOp.hasPureTensorSemantics())
+      return failure();
+
+    auto init = broadcastOp.getInit();
+    auto initType = dyn_cast<RankedTensorType>(init.getType());
+    if (!initType)
+      return failure();
+    auto initShape = initType.getShape();
+    ArrayRef<int64_t> dimensions = broadcastOp.getDimensions();
+    if (!llvm::any_of(dimensions,
+                      [&](auto dim) { return initShape[dim] == 1; }))
+      return failure();
+
+    SmallVector<int64_t> newDimensions;
+    int64_t dropDim = 0;
+    // Adjust dimensions of broadcast.
+    for (int64_t dim : dimensions) {
+      if (initShape[dim] != 1) {
+        newDimensions.push_back(dim - dropDim);
+      } else {
+        ++dropDim;
+      }
+    }
+    SmallVector<ReassociationIndices> reassociation;
+    // Build reassociation indices by grouping consecutive size-1 dimensions.
+    bool needCollapse = false;
+    for (int64_t dim = 0; dim < initType.getRank(); ++dim) {
+      if (needCollapse) {
+        reassociation.back().push_back(dim);
+      } else {
+        reassociation.push_back({dim});
+      }
+      // Update the needCollapse flag.
+      needCollapse =
+          (initShape[dim] == 1 && llvm::is_contained(dimensions, dim));
+    }
+
+    Location loc = broadcastOp.getLoc();
+    auto collapsedType =
+        tensor::CollapseShapeOp::inferCollapsedType(initType, reassociation);
+    auto collapsedInit = rewriter.create<tensor::CollapseShapeOp>(
+        loc, collapsedType, init, reassociation);
+    auto newBroadcast =
+        rewriter
+            .create<linalg::BroadcastOp>(loc, broadcastOp.getInput(),
+                                         collapsedInit, newDimensions)
+            .getResult()[0];
+    rewriter.replaceOpWithNewOp<tensor::ExpandShapeOp>(
+        broadcastOp, initType, newBroadcast, reassociation);
+    return success();
+  }
+};
+
 void BroadcastOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
-  results.add<EraseIdentityLinalgOp<BroadcastOp>>(context);
+  results.add<DropUnitDimOfBroadcastOp, EraseIdentityLinalgOp<BroadcastOp>>(
+      context);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 4bc2ed140da91a..19900203f0f5ff 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -1039,6 +1039,51 @@ func.func @broadcast_same_shape(%input: tensor<2x3xf32>, %init: tensor<2x3xf32>)
 
 // -----
 
+// CHECK-LABEL:   func.func @broadcast_unit_shape_front_fold(
+// CHECK-SAME:                                               %[[VAL_0:.*]]: tensor<2x3xf32>,
+// CHECK-SAME:                                               %[[VAL_1:.*]]: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> {
+// CHECK:           %[[VAL_2:.*]] = tensor.expand_shape %[[VAL_0]] {{\[\[}}0, 1], [2]] output_shape [1, 2, 3] : tensor<2x3xf32> into tensor<1x2x3xf32>
+// CHECK:           return %[[VAL_2]] : tensor<1x2x3xf32>
+// CHECK:         }
+func.func @broadcast_unit_shape_front_fold(%input: tensor<2x3xf32>, %init: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> {
+  %0 = linalg.broadcast ins(%input: tensor<2x3xf32>) outs(%init: tensor<1x2x3xf32>) dimensions = [0]
+  return %0 : tensor<1x2x3xf32>
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @broadcast_unit_shape_middle_fold(
+// CHECK-SAME:                                                %[[VAL_0:.*]]: tensor<2xf32>,
+// CHECK-SAME:                                                %[[VAL_1:.*]]: tensor<2x1x3xf32>) -> tensor<2x1x3xf32> {
+// CHECK:           %[[VAL_2:.*]] = tensor.collapse_shape %[[VAL_1]] {{\[\[}}0], [1, 2]] : tensor<2x1x3xf32> into tensor<2x3xf32>
+// CHECK:           %[[VAL_3:.*]] = linalg.broadcast ins(%[[VAL_0]] : tensor<2xf32>) outs(%[[VAL_2]] : tensor<2x3xf32>) dimensions = [1]
+// CHECK:           %[[VAL_4:.*]] = tensor.expand_shape %[[VAL_3]] {{\[\[}}0], [1, 2]] output_shape [2, 1, 3] : tensor<2x3xf32> into tensor<2x1x3xf32>
+// CHECK:           return %[[VAL_4]] : tensor<2x1x3xf32>
+// CHECK:         }
+func.func @broadcast_unit_shape_middle_fold(%input: tensor<2xf32>, %init: tensor<2x1x3xf32>) -> tensor<2x1x3xf32> {
+  %0 = linalg.broadcast ins(%input: tensor<2xf32>) outs(%init: tensor<2x1x3xf32>) dimensions = [1, 2]
+  return %0 : tensor<2x1x3xf32>
+}
+
+// -----
+
+// CHECK-LABEL:   func.func @broadcast_unit_shape_dynamic_fold(
+// CHECK-SAME:                                                 %[[VAL_0:.*]]: tensor<2x?xf32>,
+// CHECK-SAME:                                                 %[[VAL_1:.*]]: tensor<2x1x3x?xf32>) -> tensor<2x1x3x?xf32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = tensor.collapse_shape %[[VAL_1]] {{\[\[}}0], [1, 2], [3]] : tensor<2x1x3x?xf32> into tensor<2x3x?xf32>
+// CHECK:           %[[VAL_4:.*]] = linalg.broadcast ins(%[[VAL_0]] : tensor<2x?xf32>) outs(%[[VAL_3]] : tensor<2x3x?xf32>) dimensions = [1]
+// CHECK:           %[[VAL_5:.*]] = tensor.dim %[[VAL_1]], %[[VAL_2]] : tensor<2x1x3x?xf32>
+// CHECK:           %[[VAL_6:.*]] = tensor.expand_shape %[[VAL_4]] {{\[\[}}0], [1, 2], [3]] output_shape [2, 1, 3, %[[VAL_5]]] : tensor<2x3x?xf32> into tensor<2x1x3x?xf32>
+// CHECK:           return %[[VAL_6]] : tensor<2x1x3x?xf32>
+// CHECK:         }
+func.func @broadcast_unit_shape_dynamic_fold(%input: tensor<2x?xf32>, %init: tensor<2x1x3x?xf32>) -> tensor<2x1x3x?xf32> {
+  %0 = linalg.broadcast ins(%input: tensor<2x?xf32>) outs(%init: tensor<2x1x3x?xf32>) dimensions = [1, 2]
+  return %0 : tensor<2x1x3x?xf32>
+}
+
+// -----
+
 func.func @transpose_1d(%input: tensor<16xf32>,
                         %init: tensor<16xf32>) -> tensor<16xf32> {
   %transpose = linalg.transpose
@@ -1119,53 +1164,53 @@ func.func @transpose_transpose_fold(%input: tensor<5x4x3xf32>,
 // -----
 
 func.func @broadcast_transpose_fold(%input: tensor<2x4x5xf32>,
-                                    %init1: tensor<1x2x3x4x5x6xf32>,
-                                    %init2: tensor<1x6x2x3x5x4xf32>) -> tensor<1x6x2x3x5x4xf32> {
+                                    %init1: tensor<7x2x3x4x5x6xf32>,
+                                    %init2: tensor<7x6x2x3x5x4xf32>) -> tensor<7x6x2x3x5x4xf32> {
   // CHECK-LABEL: @broadcast_transpose_fold
   //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<2x4x5xf32>
-  //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<1x2x3x4x5x6xf32>
-  //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<1x6x2x3x5x4xf32>
+  //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<7x2x3x4x5x6xf32>
+  //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<7x6x2x3x5x4xf32>
   //       CHECK:   %[[TMP_INIT:.+]] = tensor.empty() : tensor<2x5x4xf32>
   //       CHECK:   %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<2x4x5xf32>) outs(%[[TMP_INIT]] : tensor<2x5x4xf32>) permutation = [0, 2, 1]
-  //       CHECK:   %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<2x5x4xf32>) outs(%[[INIT2]] : tensor<1x6x2x3x5x4xf32>) dimensions = [0, 3, 1]
-  //       CHECK:   return %[[BROADCAST]] : tensor<1x6x2x3x5x4xf32>
+  //       CHECK:   %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<2x5x4xf32>) outs(%[[INIT2]] : tensor<7x6x2x3x5x4xf32>) dimensions = [0, 3, 1]
+  //       CHECK:   return %[[BROADCAST]] : tensor<7x6x2x3x5x4xf32>
   %broadcast = linalg.broadcast
       ins(%input : tensor<2x4x5xf32>)
-      outs(%init1 : tensor<1x2x3x4x5x6xf32>)
+      outs(%init1 : tensor<7x2x3x4x5x6xf32>)
       dimensions = [0, 2, 5]
   %transpose = linalg.transpose
-      ins(%broadcast : tensor<1x2x3x4x5x6xf32>)
-      outs(%init2 : tensor<1x6x2x3x5x4xf32>)
+      ins(%broadcast : tensor<7x2x3x4x5x6xf32>)
+      outs(%init2 : tensor<7x6x2x3x5x4xf32>)
       permutation = [0, 5, 1, 2, 4, 3]
-  func.return %transpose : tensor<1x6x2x3x5x4xf32>
+  func.return %transpose : tensor<7x6x2x3x5x4xf32>
 }
 
 // -----
 
 func.func @broadcast_transpose_fold_dynamic(%input: tensor<?x?x5xf32>,
-                                            %init1: tensor<1x?x3x?x5x6xf32>,
-                                            %init2: tensor<1x3x?x6x5x?xf32>) -> tensor<1x3x?x6x5x?xf32> {
+                                            %init1: tensor<2x?x3x?x5x6xf32>,
+                                            %init2: tensor<2x3x?x6x5x?xf32>) -> tensor<2x3x?x6x5x?xf32> {
   // CHECK-LABEL: @broadcast_transpose_fold_dynamic
   //  CHECK-SAME:     %[[INPUT:[a-zA-Z0-9]+]]: tensor<?x?x5xf32>
-  //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<1x?x3x?x5x6xf32>
-  //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<1x3x?x6x5x?xf32>
+  //  CHECK-SAME:     %[[INIT1:[a-zA-Z0-9]+]]: tensor<2x?x3x?x5x6xf32>
+  //  CHECK-SAME:     %[[INIT2:[a-zA-Z0-9]+]]: tensor<2x3x?x6x5x?xf32>
   //   CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
   //   CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
   //       CHECK:   %[[DIM0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x5xf32>
   //       CHECK:   %[[DIM1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x5xf32>
   //       CHECK:   %[[TMP_INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]]) : tensor<?x5x?xf32>
   //       CHECK:   %[[TRANSPOSE:.+]] = linalg.transpose ins(%[[INPUT]] : tensor<?x?x5xf32>) outs(%[[TMP_INIT]] : tensor<?x5x?xf32>) permutation = [1, 2, 0]
-  //       CHECK:   %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<?x5x?xf32>) outs(%[[INIT2]] : tensor<1x3x?x6x5x?xf32>) dimensions = [0, 1, 3]
-  //       CHECK:   return %[[BROADCAST]] : tensor<1x3x?x6x5x?xf32>
+  //       CHECK:   %[[BROADCAST:.+]] = linalg.broadcast ins(%[[TRANSPOSE]] : tensor<?x5x?xf32>) outs(%[[INIT2]] : tensor<2x3x?x6x5x?xf32>) dimensions = [0, 1, 3]
+  //       CHECK:   return %[[BROADCAST]] : tensor<2x3x?x6x5x?xf32>
   %broadcast = linalg.broadcast
       ins(%input : tensor<?x?x5xf32>)
-      outs(%init1 : tensor<1x?x3x?x5x6xf32>)
+      outs(%init1 : tensor<2x?x3x?x5x6xf32>)
       dimensions = [0, 2, 5]
   %transpose = linalg.transpose
-      ins(%broadcast : tensor<1x?x3x?x5x6xf32>)
-      outs(%init2 : tensor<1x3x?x6x5x?xf32>)
+      ins(%broadcast : tensor<2x?x3x?x5x6xf32>)
+      outs(%init2 : tensor<2x3x?x6x5x?xf32>)
       permutation = [0, 2, 3, 5, 4, 1]
-  func.return %transpose : tensor<1x3x?x6x5x?xf32>
+  func.return %transpose : tensor<2x3x?x6x5x?xf32>
 }
 
 // -----