[Mlir-commits] [mlir] [mlir][tensor] Relax the logic to generalise tensor.pack (PR #110807)

Wed Oct 2 02:19:23 PDT 2024

https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/110807

Make sure that the logic to generalize tensor.pack (into e.g. tensor.pad
tensor.transpose) does indeed allow multiple dynamic tile sizes. This
was effectively already implemented in #109815 - in this PR I merely
removing one `if` condition and adding a test.

I also took the liberty of renaming a few test functions - just to
better highlight the differences between the old and the new tests.

Follow-on for #109815.


>From 2f66765583b991980181b194b19eabff9dec2ce7 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Wed, 2 Oct 2024 10:07:36 +0100
Subject: [PATCH] [mlir][tensor] Relax the logic to generalise tensor.pack

Make sure that the logic to generalize tensor.pack (into e.g. tensor.pad
tensor.transpose) does indeed allow multiple dynamic tile sizes. This
was effectively already implemented in #109815 - in this PR I merely
removing one `if` condition and adding a test.

I also took the liberty of renaming a few test functions - just to
better highlight the differences between the old and the new tests.

Follow-on for #109815.
---
 .../Dialect/Linalg/Transforms/Transforms.cpp  |  6 ---
 .../Linalg/generalize-tensor-pack.mlir        | 44 +++++++++++++++----
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 729b2653cd83c0..0fe096863d7b01 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -1145,12 +1145,6 @@ getPackUnpackRankReducedPerm(ArrayRef<int64_t> shape,
 
 LogicalResult GeneralizeOuterUnitDimsPackOpPattern::matchAndRewrite(
     tensor::PackOp packOp, PatternRewriter &rewriter) const {
-  if (llvm::count_if(packOp.getMixedTiles(),
-                     [](OpFoldResult tile) { return tile.is<Value>(); }) > 1) {
-    return rewriter.notifyMatchFailure(
-        packOp, "at most one dynamic tile size is supported");
-  }
-
   // TODO: support the case that outer dimensions are not all 1s. A
   // tensor.expand_shape will be generated in this case.
   if (llvm::any_of(packOp.getTiledOuterDims(),
diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
index bb23a869a9cc5b..7f6b5e279f6857 100644
--- a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir
@@ -19,13 +19,14 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x
 
 // -----
 
-func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> {
+func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> {
   %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
   return %0 : tensor<1x1x8x2xf32>
 }
 // CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 - 5)>
+// CHECK: #[[$ATTR_1:.+]] = affine_map<()[s0] -> (s0 - 1)>
 
-// CHECK-LABEL: func.func @simple_pad_and_pack
+// CHECK-LABEL: func.func @simple_pad_and_pack_static_tiles
 // CHECK-SAME:    %[[SRC:[a-zA-Z0-9]+]]
 // CHECK-SAME:    %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:    %[[PAD_VAL:[a-zA-Z0-9]+]]
@@ -36,18 +37,18 @@ func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2x
 // CHECK-SAME:      [0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1]
 // CHECK:         return %[[INSERT]]
 
-/// Same as example above, but with dynamic tile size.
+/// Same as example above, but with 1 dynamic tile size.
 
-func.func @simple_pad_and_pack_dynamic(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32, %high: index) -> tensor<1x1x?x2xf32> {
+func.func @simple_pad_and_pack_dynamic_tile(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32, %high: index) -> tensor<1x1x?x2xf32> {
   %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%high, 2] into %output : tensor<5x1xf32> -> tensor<1x1x?x2xf32>
   return %0 : tensor<1x1x?x2xf32>
 }
 
-// CHECK-LABEL:   func.func @simple_pad_and_pack_dynamic(
+// CHECK-LABEL:   func.func @simple_pad_and_pack_dynamic_tile(
 // CHECK-SAME:      %[[SRC:[a-zA-Z0-9]+]]
 // CHECK-SAME:      %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:      %[[PAD_VAL:[a-zA-Z0-9]+]]
-// CHECK-SAME:      %[[HIGH_VAL:.*]]: index) -> tensor<1x1x?x2xf32> {
+// CHECK-SAME:      %[[HIGH_VAL:[a-zA-Z0-9]+]]: index) -> tensor<1x1x?x2xf32> {
 // CHECK:           %[[C2:.*]] = arith.constant 2 : index
 // CHECK:           %[[PAD_HIGH:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[HIGH_VAL]]]
 // CHECK:           %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0] high{{\[}}%[[PAD_HIGH]], 1] {
@@ -58,13 +59,13 @@ func.func @simple_pad_and_pack_dynamic(%input: tensor<5x1xf32>, %output: tensor<
 // CHECK:           %[[RES:.*]] = tensor.insert_slice %[[SLICE]] into %[[DEST]][0, 0, 0, 0] [1, 1, %[[DIM]], 2] [1, 1, 1, 1] : tensor<?x2xf32> into tensor<1x1x?x2xf32>
 // CHECK:           return %[[RES]] : tensor<1x1x?x2xf32>
 
-/// Same as example above, but with scalable tile size.
+/// Same as example above, but with 1 scalable tile size.
 
 /// NOTE: For this example to make sense in practice, the "?" in the output shape
 ///       should effectively be 8 * vector.vscale (and that's what tensor.dim
 ///       below should return).
 
-func.func @simple_pad_and_pack_scalable(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32) -> tensor<1x1x?x2xf32> {
+func.func @simple_pad_and_pack_scalable_tile(%input: tensor<5x1xf32>, %output: tensor<1x1x?x2xf32>, %pad: f32) -> tensor<1x1x?x2xf32> {
   %c8 = arith.constant 8 : index
   %vscale = vector.vscale
   %c8_vscale = arith.muli %vscale, %c8 : index
@@ -72,7 +73,7 @@ func.func @simple_pad_and_pack_scalable(%input: tensor<5x1xf32>, %output: tensor
   return %0 : tensor<1x1x?x2xf32>
 }
 
-// CHECK-LABEL:   func.func @simple_pad_and_pack_scalable(
+// CHECK-LABEL:   func.func @simple_pad_and_pack_scalable_tile(
 // CHECK-SAME:      %[[SRC:[a-zA-Z0-9]+]]: tensor<5x1xf32>,
 // CHECK-SAME:      %[[DEST:[a-zA-Z0-9]+]]: tensor<1x1x?x2xf32>,
 // CHECK-SAME:      %[[PAD_VAL:[a-zA-Z0-9]+]]: f32) -> tensor<1x1x?x2xf32> {
@@ -89,6 +90,31 @@ func.func @simple_pad_and_pack_scalable(%input: tensor<5x1xf32>, %output: tensor
 // CHECK:           %[[RES:.+]] = tensor.insert_slice %[[SLICE]] into %[[DEST]][0, 0, 0, 0] [1, 1, %[[DIM]], 2] [1, 1, 1, 1] : tensor<?x2xf32> into tensor<1x1x?x2xf32>
 // CHECK:           return %[[RES]] : tensor<1x1x?x2xf32>
 
+/// Same as example above, but with both tile sizes dynamic.
+
+func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x?x?xf32>, %pad: f32, %high_1: index, %high_2: index) -> tensor<1x1x?x?xf32> {
+  %0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [%high_1, %high_2] into %output : tensor<5x1xf32> -> tensor<1x1x?x?xf32>
+  return %0 : tensor<1x1x?x?xf32>
+}
+// CHECK-LABEL:   func.func @simple_pad_and_pack_dynamic_tiles(
+// CHECK-SAME:      %[[SRC:[a-zA-Z0-9]+]]: tensor<5x1xf32>,
+// CHECK-SAME:      %[[DEST:[a-zA-Z0-9]+]]: tensor<1x1x?x?xf32>,
+// CHECK-SAME:      %[[PAD_VAL:[a-zA-Z0-9]+]]: f32,
+// CHECK-SAME:      %[[HIGH_VAL_1:[a-zA-Z0-9]+]]: index,
+// CHECK-SAME:      %[[HIGH_VAL_2:[a-zA-Z0-9]+]]: index) -> tensor<1x1x?x?xf32> {
+// CHECK:           %[[C3:.*]] = arith.constant 3 : index
+// CHECK:           %[[C2:.*]] = arith.constant 2 : index
+// CHECK:           %[[PAD_HIGH_1:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[HIGH_VAL_1]]]
+// CHECK:           %[[PAD_HIGH_2:.*]] = affine.apply #[[$ATTR_1]](){{\[}}%[[HIGH_VAL_2]]]
+// CHECK:           %[[PAD:.*]] = tensor.pad %[[SRC]] low[0, 0] high{{\[}}%[[PAD_HIGH_1]], %[[PAD_HIGH_2]]] {
+// CHECK:             tensor.yield %[[PAD_VAL]] : f32
+// CHECK-NOT:       linalg.transpose
+// CHECK:           %[[SLICE:.*]] = tensor.extract_slice %[[PAD:.*]][0, 0] {{\[}}%[[HIGH_VAL_1]], %[[HIGH_VAL_2]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK:           %[[DIM_1:.*]] = tensor.dim %[[DEST]], %[[C2]] : tensor<1x1x?x?xf32>
+// CHECK:           %[[DIM_2:.*]] = tensor.dim %[[DEST]], %[[C3]] : tensor<1x1x?x?xf32>
+// CHECK:           %[[RES:.*]] = tensor.insert_slice %[[SLICE]] into %[[DEST]][0, 0, 0, 0] [1, 1, %[[DIM_1]], %[[DIM_2]]] [1, 1, 1, 1] : tensor<?x?xf32> into tensor<1x1x?x?xf32>
+// CHECK:           return %[[RES]] : tensor<1x1x?x?xf32>
+
 // -----
 
 func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{