[Mlir-commits] [mlir] [MLIR] Add allow Insert/extract slice option to pack/unpack op (PR #117340)
Zhuoran Yin
llvmlistbot at llvm.org
Mon Dec 9 14:07:56 PST 2024
================
@@ -0,0 +1,238 @@
+// RUN: mlir-opt %s --transform-interpreter --split-input-file -canonicalize | FileCheck %s
+
+// For pack op, we use lowerPadLikeWithInsertSlice = false to ensure no insert_slice is generated.
+// This allows linalg.transpose to be fused as a producer operation. Alternatively, without this attribute
+// insert_slice will be generated and fusion blocked.
+
+module {
+ // CHECK-label: func @fuse_pack_as_producer
+ // CHECK: scf.forall {{.*}} {
+ // CHECK: linalg.transpose
+ // CHECK: linalg.generic
+ // CHECK: scf.forall.in_parallel
+ // CHECK: }
+ func.func @fuse_pack_as_producer(%src: tensor<128x256xf32>, %other: tensor<4x4x128x256xf32>)
+ -> tensor<4x4x128x256xf32> {
+ %dest = tensor.empty() : tensor<1x1x128x256xf32>
+ %pack = tensor.pack %src inner_dims_pos = [0, 1] inner_tiles = [128, 256]
+ into %dest : tensor<128x256xf32> -> tensor<1x1x128x256xf32>
+
+ %out = tensor.empty() : tensor<4x4x128x256xf32>
+ %res = linalg.generic
+ {indexing_maps = [affine_map<(i, j, k, l) -> (0, 0, k, l)>,
+ affine_map<(i, j, k, l) -> (i, j, k, l)>,
+ affine_map<(i, j, k, l) -> (i, j, k, l)>],
+ iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
+ ins(%pack, %other: tensor<1x1x128x256xf32>, tensor<4x4x128x256xf32>)
+ outs(%out: tensor<4x4x128x256xf32>) {
+ ^bb0(%pack_elem: f32, %other_elem: f32, %out_elem: f32):
+ %r = arith.addf %pack_elem, %other_elem : f32
+ linalg.yield %r : f32
+ } -> tensor<4x4x128x256xf32>
+
+ return %res : tensor<4x4x128x256xf32>
+ }
+
+ module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ // Find and lower pack operation.
+ %pack = transform.structured.match ops{["tensor.pack"]} in %arg1
+ : (!transform.any_op) -> !transform.op<"tensor.pack">
+ %paded, %expanded, %transpose = transform.structured.lower_pack %pack {lowerPadLikeWithInsertSlice = false}
+ : (!transform.op<"tensor.pack">)
+ -> (!transform.op<"tensor.pad">,
+ !transform.op<"tensor.expand_shape">,
+ !transform.op<"linalg.transpose">)
+
+ %root = transform.structured.match ops{["linalg.generic"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+ // Tile the lialg operation with parallel forall loop tiling [4, 4].
+ %tiled_op, %forall_op = transform.structured.tile_using_forall %root num_threads [4, 4]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+
+ // Fuse the transpose operation into the tiled loop.
+ transform.structured.fuse_into_containing_op %transpose into %forall_op
+ : (!transform.op<"linalg.transpose">, !transform.any_op) -> (!transform.any_op, !transform.any_op)
+ transform.yield
+ }
+ }
+}
+
+// -----
+// For pack op, by default lowerPadLikeWithInsertSlice = true, which generates insert_slice and blocks fusion.
+
+module {
+ // CHECK-label: func @fuse_pack_as_producer_blocked_by_insert_slice
+ // CHECK: tensor.insert_slice
+ // CHECK: scf.forall {{.*}} {
+ // CHECK: scf.forall.in_parallel
----------------
jerryyin wrote:
Good point!
This is addressed in the latest commit. I'll leave this review open for a day or two in case you've seen anything else worth mentioning.
https://github.com/llvm/llvm-project/pull/117340
More information about the Mlir-commits
mailing list