[Mlir-commits] [mlir] [mlir][tensor] Improve `FoldTensorCastProducerOp` (dynamic shapes) (PR #114559)

Fri Nov 1 09:09:22 PDT 2024

https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/114559

Currently, `FoldTensorCastProducerOp` incorrectly folds the following:
```mlir
    %pack = tensor.pack %src
      padding_value(%pad : i32)
      inner_dims_pos = [0, 1]
      inner_tiles = [%c8, 1]
      into %cast : tensor<7x?xi32> -> tensor<1x1x?x1xi32>
    %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32>
```
as (note the static trailing dim in the result and dynamic tile
dimension that corresponds to that):
```mlir
    %res = tensor.pack %src
      padding_value(%pad : i32)
      inner_dims_pos = [0, 1]
      inner_tiles = [%c8, 1]
      into %cast : tensor<7x?xi32> -> tensor<1x1x8x1xi32>
```

This triggers an Op verification failure and is due to the fact that the
folder does not update the inner tile sizes in the pack Op. This PR
addresses that.

Note, supporting other Ops with size-like attributes is left as a TODO;


>From d40f7052348001349164d13a50c2beff164373e8 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Fri, 1 Nov 2024 15:59:47 +0000
Subject: [PATCH] [mlir][tensor] Improve `FoldTensorCastProducerOp` (dynamic
 shapes)

Currently, `FoldTensorCastProducerOp` incorrectly folds the following:
```mlir
    %pack = tensor.pack %src
      padding_value(%pad : i32)
      inner_dims_pos = [0, 1]
      inner_tiles = [%c8, 1]
      into %cast : tensor<7x?xi32> -> tensor<1x1x?x1xi32>
    %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32>
```
as (note the static trailing dim in the result and dynamic tile
dimension that corresponds to that):
```mlir
    %res = tensor.pack %src
      padding_value(%pad : i32)
      inner_dims_pos = [0, 1]
      inner_tiles = [%c8, 1]
      into %cast : tensor<7x?xi32> -> tensor<1x1x8x1xi32>
```

This triggers an Op verification failure and is due to the fact that the
folder does not update the inner tile sizes in the pack Op. This PR
addresses that.

Note, supporting other Ops with size-like attributes is left as a TODO;
---
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp   | 46 +++++++++++++++++++++-
 mlir/test/Dialect/Tensor/canonicalize.mlir | 23 ++++++++++-
 2 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index c2d6bc610cd92a..406b557b0f0e39 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -4756,8 +4756,50 @@ struct FoldTensorCastProducerOp
         newResultTypes[dpsInitIdx++] = newOperands.back().getType();
     }
 
-    // Clone op.
-    Operation *newOp = clone(rewriter, op, newResultTypes, newOperands);
+    // For ops that have sizes-like attribute, update these accordingly.
+    // For now, only `tensor.pack` is supported.
+    // TODO: Generalize to make it work with other ops as well (e.g.
+    // `tensor.unpack`)
+    SmallVector<OpFoldResult> newMixedTileSizes;
+    if (auto pack = dyn_cast_or_null<tensor::PackOp>(*op)) {
+      for (auto it : llvm::zip(cast<ShapedType>(newResultTypes[0])
+                                   .getShape()
+                                   .take_back(pack.getMixedTiles().size()),
+                               pack.getMixedTiles())) {
+
+        int64_t shape = std::get<0>(it);
+        if (shape == ShapedType::kDynamic) {
+          newMixedTileSizes.push_back(std::get<1>(it));
+          continue;
+        }
+
+        if (Attribute attr =
+                llvm::dyn_cast_if_present<Attribute>(std::get<1>(it))) {
+          // Already a constant
+          newMixedTileSizes.push_back(std::get<1>(it));
+        } else {
+          auto tileSize = getConstantIntValue(std::get<1>(it));
+          assert(tileSize == shape && "tile size and dim size don't match!");
+          newMixedTileSizes.push_back(
+              (rewriter.getIntegerAttr(rewriter.getIndexType(), shape)));
+        }
+      }
+    }
+
+    // Clone op. For ops that have sizes-like attribute, make sure to udpate
+    // those as well. For now, only `tensor.pack` is supported.
+    // TODO: Generalize to make it work with other ops as well (e.g.
+    // `tensor.unpack`)
+    // Operation *newOp;
+    Operation *newOp;
+    if (auto pack = dyn_cast_or_null<tensor::PackOp>(*op)) {
+      newOp = rewriter.create<PackOp>(
+          pack.getLoc(), newOperands[0], newOperands[1], pack.getInnerDimsPos(),
+          newMixedTileSizes, pack.getPaddingValue(), pack.getOuterDimsPerm());
+    } else {
+      newOp = clone(rewriter, op, newResultTypes, newOperands);
+    }
+
     SmallVector<Value, 4> replacements;
     replacements.reserve(newOp->getNumResults());
     for (auto [oldResult, newResult] :
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 693079c3aa2fac..ebcc69250ad56d 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -2718,18 +2718,37 @@ func.func @dim_out_of_bounds() -> vector<7xi32> {
 
 // -----
 
-// CHECK-LABEL:   func.func @test_destination_multiple_result(
+// CHECK-LABEL:   func.func @fold_cast_multiple_results(
 // CHECK-SAME:         %[[ARG1:.*]]: tensor<2x2xf32>,
 // CHECK-SAME:         %[[ARG2:.*]]: tensor<2x2xf32>) -> index {
 // CHECK:           %[[RES:.*]]:2 = test.destination_style_op ins(%[[ARG1]] : tensor<2x2xf32>)
 // CHECK-SAME:      outs(%[[ARG2]] : tensor<2x2xf32>) -> tensor<2x2xf32>, index
 // CHECK:           return %[[RES]]#1 : index
-func.func @test_destination_multiple_result(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> index {
+func.func @fold_cast_multiple_results(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> index {
   %cast = tensor.cast %arg0 : tensor<2x2xf32> to tensor<?x2xf32>
   %cast_0 = tensor.cast %arg1 : tensor<2x2xf32> to tensor<?x2xf32>
   %0:2 = test.destination_style_op ins(%cast : tensor<?x2xf32>) outs(%cast_0 : tensor<?x2xf32>) -> tensor<?x2xf32>, index
   return %0#1 : index
 }
+// -----
+
+// CHECK-LABEL:   func.func @fold_cast_pack_dynamic_tile_size
+// CHECK-SAME:      %[[DEST:.*]]: tensor<1x1x8x1xi32>,
+// CHECK-SAME:      %[[SRC:.*]]: tensor<7x?xi32>,
+// CHECK-SAME:      %[[PAD:.*]]: i32) -> tensor<1x1x8x1xi32> {
+// CHECK:           %[[PACK:.*]] = tensor.pack %[[SRC]] padding_value(%[[PAD]] : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %[[DEST]] : tensor<7x?xi32> -> tensor<1x1x8x1xi32>
+// CHECK:           return %[[PACK]] : tensor<1x1x8x1xi32>
+func.func @fold_cast_pack_dynamic_tile_size(
+  %dest: tensor<1x1x8x1xi32>,
+  %src: tensor<7x?xi32>,
+  %pad: i32) -> tensor<1x1x8x1xi32> {
+
+    %cast = tensor.cast %dest : tensor<1x1x8x1xi32> to tensor<1x1x?x1xi32>
+    %c8 = arith.constant 8 : index
+    %pack = tensor.pack %src padding_value(%pad : i32) inner_dims_pos = [0, 1] inner_tiles = [%c8, 1] into %cast : tensor<7x?xi32> -> tensor<1x1x?x1xi32>
+    %res = tensor.cast %pack : tensor<1x1x?x1xi32> to tensor<1x1x8x1xi32>
+    return %res : tensor<1x1x8x1xi32>
+}
 
 // -----