[mlir] [llvm] [mlir][tensor] Fold consumer linalg transpose with producer tensor pack (PR #74206)
Han-Chung Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 13 13:34:17 PST 2023
================
@@ -114,3 +114,237 @@ func.func @pad_pack_different_padding_value(%src: tensor<16641x16xf32>) -> tenso
// CHECK-LABEL: func.func @pad_pack_different_padding_value
// CHECK: tensor.pad
// CHECK: tensor.pack
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x57x56x2x32xf32> {
+ %0 = tensor.empty() : tensor<56x2x1x57x32xf32>
+ %pack = tensor.pack %arg0
+ outer_dims_perm = [0, 3, 2, 1]
+ inner_dims_pos = [3]
+ inner_tiles = [32]
+ into %0 : tensor<56x57x1x64xf32> -> tensor<56x2x1x57x32xf32>
+
+ %1 = tensor.empty() : tensor<1x57x56x2x32xf32>
+ %transposed = linalg.transpose
+ ins(%pack : tensor<56x2x1x57x32xf32>)
+ outs(%1 : tensor<1x57x56x2x32xf32>)
+ permutation = [2, 3, 0, 1, 4]
+ return %transposed : tensor<1x57x56x2x32xf32>
+}
+// CHECK: func @tensor_pack_linalg_transpose_fold(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
+// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
+// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME: into %[[INIT]]
+// CHECK: return %[[PACK]]
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold_with_padding(%arg0: tensor<56x57x1x55xf32>, %padding: f32) -> tensor<1x57x56x2x32xf32> {
+ %0 = tensor.empty() : tensor<56x2x1x57x32xf32>
+ %pack = tensor.pack %arg0 padding_value(%padding : f32)
+ outer_dims_perm = [0, 3, 2, 1]
+ inner_dims_pos = [3]
+ inner_tiles = [32]
+ into %0 : tensor<56x57x1x55xf32> -> tensor<56x2x1x57x32xf32>
+
+ %1 = tensor.empty() : tensor<1x57x56x2x32xf32>
+ %transposed = linalg.transpose
+ ins(%pack : tensor<56x2x1x57x32xf32>)
+ outs(%1 : tensor<1x57x56x2x32xf32>)
+ permutation = [2, 3, 0, 1, 4]
+ return %transposed : tensor<1x57x56x2x32xf32>
+}
+// CHECK: func @tensor_pack_linalg_transpose_fold_with_padding(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x55xf32>, %[[PADDING:.+]]: f32)
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x57x56x2x32xf32>
+// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] padding_value(%[[PADDING]] : f32)
+// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
+// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME: into %[[INIT]]
+// CHECK: return %[[PACK]]
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(%arg0: tensor<56x57x1x64xf32>) -> tensor<1x2x56x57x32xf32> {
+ %0 = tensor.empty() : tensor<56x57x1x2x32xf32>
+ %pack = tensor.pack %arg0
+ inner_dims_pos = [3]
+ inner_tiles = [32]
+ into %0 : tensor<56x57x1x64xf32> -> tensor<56x57x1x2x32xf32>
+
+ %1 = tensor.empty() : tensor<1x2x56x57x32xf32>
+ %transposed = linalg.transpose
+ ins(%pack : tensor<56x57x1x2x32xf32>)
+ outs(%1 : tensor<1x2x56x57x32xf32>)
+ permutation = [2, 3, 0, 1, 4]
+ return %transposed : tensor<1x2x56x57x32xf32>
+}
+// CHECK: func @tensor_pack_linalg_transpose_fold_no_outer_dims_perm(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<56x57x1x64xf32>)
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x2x56x57x32xf32>
+// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 3, 0, 1]
+// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME: into %[[INIT]]
+// CHECK: return %[[PACK]]
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<12x56x4x9x32x8x2xf32> {
+ %0 = tensor.empty() : tensor<4x9x12x56x8x2x32xf32>
+ %pack = tensor.pack %arg0
+ outer_dims_perm = [3, 1, 2, 0]
+ inner_dims_pos = [1, 2, 3]
+ inner_tiles = [8, 2, 32]
+ into %0 : tensor<56x72x24x128xf32> -> tensor<4x9x12x56x8x2x32xf32>
+
+ %1 = tensor.empty() : tensor<12x56x4x9x32x8x2xf32>
+ %transposed = linalg.transpose
+ ins(%pack : tensor<4x9x12x56x8x2x32xf32>)
+ outs(%1 : tensor<12x56x4x9x32x8x2xf32>)
+ permutation = [2, 3, 0, 1, 6, 4, 5]
+ return %transposed : tensor<12x56x4x9x32x8x2xf32>
+}
+// CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_transpose(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<56x72x24x128xf32>)
+// CHECK: %[[INIT:.+]] = tensor.empty() : tensor<12x56x4x9x32x8x2xf32>
+// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 0, 3, 1]
+// CHECK-SAME: inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2]
+// CHECK-SAME: into %[[INIT]]
+// CHECK: return %[[PACK]]
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(%arg0: tensor<56x72x24x128xf32>) -> tensor<9x56x2x12x32x8x4xf32> {
+ %0 = tensor.empty() : tensor<4x12x9x56x8x2x32xf32>
+ %pack = tensor.pack %arg0
+ outer_dims_perm = [3, 2, 1, 0]
+ inner_dims_pos = [1, 2, 3]
+ inner_tiles = [8, 2, 32]
+ into %0 : tensor<56x72x24x128xf32> -> tensor<4x12x9x56x8x2x32xf32>
+
+ %1 = tensor.empty() : tensor<9x56x2x12x32x8x4xf32>
+ %transposed = linalg.transpose
+ ins(%pack : tensor<4x12x9x56x8x2x32xf32>)
+ outs(%1 : tensor<9x56x2x12x32x8x4xf32>)
+ permutation = [2, 3, 5, 1, 6, 4, 0]
+ return %transposed : tensor<9x56x2x12x32x8x4xf32>
+}
+// CHECK: func @tensor_pack_linalg_transpose_fold_tile_dims_outer_dims_transpose(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<56x72x24x128xf32>)
+// CHECK: tensor.pack
+// CHECK: linalg.transpose
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(%arg0: tensor<56x?x?x64xf32>) -> tensor<?x?x56x2x32xf32> {
+ %0 = tensor.empty() : tensor<56x2x1x57x32xf32>
+ %pack = tensor.pack %arg0
+ outer_dims_perm = [0, 3, 2, 1]
+ inner_dims_pos = [3]
+ inner_tiles = [32]
+ into %0 : tensor<56x?x?x64xf32> -> tensor<56x2x1x57x32xf32>
+
+ %1 = tensor.empty() : tensor<1x57x56x2x32xf32>
+ %transposed = linalg.transpose
+ ins(%pack : tensor<56x2x1x57x32xf32>)
+ outs(%1 : tensor<1x57x56x2x32xf32>)
+ permutation = [2, 3, 0, 1, 4]
+
+ %return_value = tensor.cast %transposed : tensor<1x57x56x2x32xf32> to tensor<?x?x56x2x32xf32>
+ return %return_value : tensor<?x?x56x2x32xf32>
+}
+// CHECK: func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<56x?x?x64xf32>)
+// CHECK: %[[c1:.+]] = arith.constant 1 : index
+// CHECK: %[[c2:.+]] = arith.constant 2 : index
+// CHECK: %[[dim:.+]] = tensor.dim %[[ARG0]], %[[c1]] : tensor<56x?x?x64xf32>
+// CHECK: %[[dim_0:.+]] = tensor.dim %[[ARG0]], %[[c2]] : tensor<56x?x?x64xf32>
+// CHECK: %[[INIT:.+]] = tensor.empty(%[[dim_0]], %[[dim]]) : tensor<?x?x56x2x32xf32>
+// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]]
+// CHECK-SAME: outer_dims_perm = [2, 1, 0, 3]
+// CHECK-SAME: inner_dims_pos = [3] inner_tiles = [32]
+// CHECK-SAME: into %[[INIT]]
+// CHECK: return %[[PACK]]
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(%arg0: tensor<56x?x?x128xf32>) -> tensor<?x?x56x9x32x8x2xf32> {
+ %0 = tensor.empty() : tensor<56x9x12x4x8x2x32xf32>
+ %pack = tensor.pack %arg0
+ inner_dims_pos = [1, 2, 3]
+ inner_tiles = [8, 2, 32]
+ into %0 : tensor<56x?x?x128xf32> -> tensor<56x9x12x4x8x2x32xf32>
+
+ %1 = tensor.empty() : tensor<12x4x56x9x32x8x2xf32>
+ %transposed = linalg.transpose
+ ins(%pack : tensor<56x9x12x4x8x2x32xf32>)
+ outs(%1 : tensor<12x4x56x9x32x8x2xf32>)
+ permutation = [2, 3, 0, 1, 6, 4, 5]
+
+ %return_value = tensor.cast %transposed : tensor<12x4x56x9x32x8x2xf32> to tensor<?x?x56x9x32x8x2xf32>
+ return %return_value : tensor<?x?x56x9x32x8x2xf32>
+}
+// CHECK: #[[map:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+// CHECK: #[[map1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
+// CHECK: module {
+// CHECK: func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_and_tile_dims(
+// CHECK-SAME: %[[ARG0:.+]]: tensor<56x?x?x128xf32>)
+// CHECK: %[[c1:.+]] = arith.constant 1 : index
+// CHECK: %[[c2:.+]] = arith.constant 2 : index
+// CHECK: %[[dim:.+]] = tensor.dim %[[ARG0]], %[[c1]] : tensor<56x?x?x128xf32>
+// CHECK: %[[dim_0:.+]] = tensor.dim %[[ARG0]], %[[c2]] : tensor<56x?x?x128xf32>
+// CHECK: %[[mapped_dim1:.+]] = affine.apply #[[map:.+]]()[%[[dim]]]
+// CHECK: %[[mapped_dim2:.+]] = affine.apply #[[map1:.+]]()[%[[dim_0]]]
+// CHECK: %[[INIT:.+]] = tensor.empty(%[[mapped_dim2]], %[[mapped_dim1]]) : tensor<?x4x56x?x32x8x2xf32>
+// CHECK: %[[PACK:.+]] = tensor.pack %[[ARG0]] outer_dims_perm = [2, 3, 0, 1] inner_dims_pos = [3, 1, 2] inner_tiles = [32, 8, 2] into %[[INIT]] : tensor<56x?x?x128xf32> -> tensor<?x4x56x?x32x8x2xf32>
+// CHECK: %[[CAST:.+]] = tensor.cast %[[PACK]] : tensor<?x4x56x?x32x8x2xf32> to tensor<?x?x56x9x32x8x2xf32>
+// CHECK: return %[[CAST]] : tensor<?x?x56x9x32x8x2xf32>
+// CHECK: }
+
+// -----
+
+func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
+ %0 = tensor.empty() : tensor<56x9x12x4x8x2x32xf32>
+ %cast1 = tensor.cast %0 : tensor<56x9x12x4x8x2x32xf32> to tensor<?x?x?x?x?x?x?xf32>
+ %pack = tensor.pack %arg0
+ outer_dims_perm = [3, 0, 2, 1]
+ inner_dims_pos = [1, 2, 3]
+ inner_tiles = [%tile_p, %tile_q, %tile_r]
+ into %cast1 : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
----------------
hanhanW wrote:
Can we simplify the test? E.g., I think we can remove the cast op and just pass the destination tensor from function arguments. E.g.,
```suggestion
func.func @tensor_pack_linalg_transpose_fold_dynamic_outer_dims_tile_dims_tile_sizes(%arg0: tensor<?x?x?x?xf32>, %dest: tensor<?x?x?x?x?x?x?xf32>, %tile_p : index, %tile_q : index, %tile_r : index) -> tensor<?x?x?x?x?x?x?xf32> {
%pack = tensor.pack %arg0
outer_dims_perm = [3, 0, 2, 1]
inner_dims_pos = [1, 2, 3]
inner_tiles = [%tile_p, %tile_q, %tile_r]
into %dest : tensor<?x?x?x?xf32> -> tensor<?x?x?x?x?x?x?xf32>
```
Same for the transpose init tensor.
https://github.com/llvm/llvm-project/pull/74206
More information about the llvm-commits
mailing list