[Mlir-commits] [mlir] [MLIR][Transform] FuseOp: accept transform params, add use_forall argument (PR #161883)

Fri Oct 3 10:16:18 PDT 2025

llvmbot wrote:



@llvm/pr-subscribers-mlir-tensor

@llvm/pr-subscribers-mlir-linalg

Author: Tuomas Kärnä (tkarna)

<details>
<summary>Changes</summary>

Changes to linalg `structured.fuse` transform op:

* Adds an optional `use_forall` boolean argument which generates a tiled `scf.forall` loop instead of `scf.for` loops.
* `tile_sizes` can now be any parameter or handle.
* `tile_interchange` can now be any parameter or handle.
* IR formatting changes
  - from `transform.structured.fuse %0 [4, 8] ...`
  - to `transform.structured.fuse %0 tile_sizes [4, 8] ...`

---

Patch is 41.57 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161883.diff


7 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+50-9) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+155-16) 
- (modified) mlir/python/mlir/dialects/transform/structured.py (+25-11) 
- (modified) mlir/test/Dialect/Linalg/transform-op-fuse.mlir (+71-17) 
- (modified) mlir/test/Dialect/Tensor/tiling.mlir (+1-1) 
- (modified) mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir (+12-12) 
- (modified) mlir/test/python/dialects/transform_structured_ext.py (+33-2) 


``````````diff

diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 0d6ebc087e2f3..40588afa6477a 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -395,31 +395,72 @@ def EliminateLinalgOpAnchoredEmptyTensorsOp
 //===----------------------------------------------------------------------===//
 
 def FuseOp : Op<Transform_Dialect, "structured.fuse",
-    [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
-     DeclareOpInterfaceMethods<TransformOpInterface>,
-     ReportTrackingListenerFailuresOpTrait]> {
+       [AttrSizedOperandSegments,
+        DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
+        TransformOpInterface, ReportTrackingListenerFailuresOpTrait]> {
   let description = [{
     Tiles the operations pointed to by the target handle and fuses their
     producers greedily using the options provided as attributes.
 
     If `apply_cleanup` is true then slice canonicalization is applied between
-    fusion steps.
+    fusion steps. If `use_forall` is true then tiling method generates a
+    `scf.forall` loop instead of `scf.for` loops.
   }];
 
   let arguments =
     (ins TransformHandleTypeInterface:$target,
-         DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_sizes,
-         DefaultValuedAttr<I64ArrayAttr, "{}">:$tile_interchange,
-         DefaultValuedAttr<BoolAttr, "false">:$apply_cleanup);
+        Variadic<TransformAnyParamTypeOrAnyHandle> : $tile_sizes,
+        Variadic<TransformAnyParamTypeOrAnyHandle> : $tile_interchange,
+        DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_tile_sizes,
+        DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_tile_interchange,
+        DefaultValuedAttr<BoolAttr, "false">:$apply_cleanup,
+        DefaultValuedAttr<BoolAttr, "false">:$use_forall);
   let results = (outs TransformHandleTypeInterface:$transformed,
                       Variadic<TransformHandleTypeInterface>:$loops);
+  let builders = [
+    OpBuilder<(ins "TypeRange":$loopTypes,
+                   "Value":$target,
+                   "ArrayRef<int64_t>":$staticTileSizes,
+                   "ArrayRef<int64_t>":$staticTileInterchange,
+                   CArg<"bool", "false">:$applyCleanup,
+                   CArg<"bool", "false">:$useForall)>,
+    OpBuilder<(ins "TypeRange":$loopTypes,
+                   "Value":$target,
+                   "ArrayRef<OpFoldResult>":$mixedTileSizes,
+                   "ArrayRef<OpFoldResult>":$mixedTileInterchange,
+                   CArg<"bool", "false">:$applyCleanup,
+                   CArg<"bool", "false">:$useForall)>,
+    OpBuilder<(ins "Value":$target,
+                   "ArrayRef<int64_t>":$staticTileSizes,
+                   "ArrayRef<int64_t>":$staticTileInterchange,
+                   CArg<"bool", "false">:$applyCleanup,
+                   CArg<"bool", "false">:$useForall)>,
+    OpBuilder<(ins "Value":$target,
+                   "ArrayRef<OpFoldResult>":$mixedTileSizes,
+                   "ArrayRef<OpFoldResult>":$mixedTileInterchange,
+                   CArg<"bool", "false">:$applyCleanup,
+                   CArg<"bool", "false">:$useForall)>,
+  ];
 
   let assemblyFormat = [{
-    $target ($tile_sizes^)? (`interchange` $tile_interchange^)?
-    (`apply_cleanup` `=` $apply_cleanup^)? attr-dict
+    $target
+    (`tile_sizes` custom<DynamicIndexList>($tile_sizes, $static_tile_sizes)^)?
+    (`interchange` custom<DynamicIndexList>($tile_interchange, $static_tile_interchange)^)?
+    (`apply_cleanup` `=` $apply_cleanup^)?
+    (`use_forall` `=` $use_forall^)? attr-dict
     `:` functional-type(operands, results)
   }];
   let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    ::mlir::DiagnosedSilenceableFailure apply(
+        ::mlir::transform::TransformRewriter &rewriter,
+        ::mlir::transform::TransformResults &transformResults,
+        ::mlir::transform::TransformState &state);
+
+    ::mlir::SmallVector<::mlir::OpFoldResult> getMixedTileSizes();
+    ::mlir::SmallVector<::mlir::OpFoldResult> getMixedTileInterchange();
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index dd9b4c2490ef4..0d365f29a51a3 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -576,6 +576,86 @@ transform::EliminateLinalgOpAnchoredEmptyTensorsOp::apply(
 // FuseOp
 //===----------------------------------------------------------------------===//
 
+void transform::FuseOp::build(OpBuilder &builder, OperationState &result,
+                              TypeRange loopTypes, Value target,
+                              ArrayRef<int64_t> staticTileSizes,
+                              ArrayRef<int64_t> staticTileInterchange,
+                              bool applyCleanup, bool useForall) {
+  return build(
+      builder, result, loopTypes,
+      /*target=*/target,
+      /*mixedTileSizes=*/
+      getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)),
+      /*mixedTileInterchange=*/
+      getAsOpFoldResult(builder.getI64ArrayAttr(staticTileInterchange)),
+      applyCleanup, useForall);
+}
+
+void transform::FuseOp::build(OpBuilder &builder, OperationState &result,
+                              Value target, ArrayRef<int64_t> staticTileSizes,
+                              ArrayRef<int64_t> staticTileInterchange,
+                              bool applyCleanup, bool useForall) {
+  return build(
+      builder, result,
+      /*target=*/target,
+      /*mixedTileSizes=*/
+      getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)),
+      /*mixedTileInterchange=*/
+      getAsOpFoldResult(builder.getI64ArrayAttr(staticTileInterchange)),
+      applyCleanup, useForall);
+}
+
+void transform::FuseOp::build(OpBuilder &builder, OperationState &result,
+                              Value target,
+                              ArrayRef<OpFoldResult> mixedTileSizes,
+                              ArrayRef<OpFoldResult> mixedTileInterchange,
+                              bool applyCleanup, bool useForall) {
+  // Loop types are automaticaly splat by the callee, setting up one is
+  // enough.
+  SmallVector<Type> loopTypes(1, builder.getType<transform::AnyOpType>());
+  build(builder, result, loopTypes, target, mixedTileSizes,
+        mixedTileInterchange, applyCleanup, useForall);
+}
+
+void transform::FuseOp::build(OpBuilder &builder, OperationState &result,
+                              TypeRange loopTypes, Value target,
+                              ArrayRef<OpFoldResult> mixedTileSizes,
+                              ArrayRef<OpFoldResult> mixedTileInterchange,
+                              bool applyCleanup, bool useForall) {
+  SmallVector<int64_t> staticTileSizes;
+  SmallVector<Value> dynamicTileSizes;
+  dispatchIndexOpFoldResults(mixedTileSizes, dynamicTileSizes, staticTileSizes);
+  SmallVector<int64_t> staticTileInterchange;
+  SmallVector<Value> dynamicTileInterchange;
+  dispatchIndexOpFoldResults(mixedTileInterchange, dynamicTileInterchange,
+                             staticTileInterchange);
+  // Call the default builder which sets up the proper operands segment sizes
+  // attributes for multiple variadic operands. In the absence of this,
+  // horrible bugs ensue.
+  auto staticTileSizesAttr = builder.getDenseI64ArrayAttr(staticTileSizes);
+  auto staticTileInterchangeAttr =
+      builder.getDenseI64ArrayAttr(staticTileInterchange);
+  unsigned numExpectedLoops =
+      useForall ? 1 : staticTileSizes.size() - llvm::count(staticTileSizes, 0);
+  SmallVector<Type> resultTypes;
+  resultTypes.reserve(numExpectedLoops);
+  assert((loopTypes.size() == 1 || loopTypes.size() == numExpectedLoops) &&
+         "expected one loop type or as many as loops");
+  if (loopTypes.size() == 1)
+    resultTypes.append(numExpectedLoops, loopTypes[0]);
+  else
+    llvm::append_range(resultTypes, loopTypes);
+  build(builder, result, /*transformed=*/target.getType(),
+        /*loops=*/resultTypes,
+        /*target=*/target,
+        /*tile_sizes=*/dynamicTileSizes,
+        /*tile_interchange=*/dynamicTileInterchange,
+        /*static_tile_sizes=*/staticTileSizesAttr,
+        /*static_tile_interchange=*/staticTileInterchangeAttr,
+        /*apply_cleanup=*/applyCleanup,
+        /*use_forall=*/useForall);
+}
+
 /// Apply a tiling transformation to all payload ops and store both the
 /// tiled operation as well as the created tile loops.
 template <typename Range>
@@ -630,13 +710,25 @@ DiagnosedSilenceableFailure
 transform::FuseOp::apply(transform::TransformRewriter &rewriter,
                          mlir::transform::TransformResults &transformResults,
                          mlir::transform::TransformState &state) {
-  SmallVector<int64_t> tileSizes =
-      extractFromIntegerArrayAttr<int64_t>(getTileSizes());
-  SmallVector<int64_t> tileInterchange =
-      extractFromIntegerArrayAttr<int64_t>(getTileInterchange());
+  auto transformOp = cast<TransformOpInterface>(getOperation());
+
+  SmallVector<int64_t> tileSizes;
+  DiagnosedSilenceableFailure status = reifyMixedParamAndHandleResults(
+      state, transformOp, getMixedTileSizes(), tileSizes);
+  if (!status.succeeded())
+    return status;
+  SmallVector<int64_t> tileInterchange;
+  status = reifyMixedParamAndHandleResults(
+      state, transformOp, getMixedTileInterchange(), tileInterchange);
+  if (!status.succeeded())
+    return status;
 
   scf::SCFTilingOptions tilingOptions;
   tilingOptions.interchangeVector = tileInterchange;
+  bool useForall = getUseForall();
+  tilingOptions.setLoopType(useForall
+                                ? scf::SCFTilingOptions::LoopType::ForallOp
+                                : scf::SCFTilingOptions::LoopType::ForOp);
   SmallVector<OpFoldResult> tileSizesOfr =
       getAsIndexOpFoldResult(rewriter.getContext(), tileSizes);
   tilingOptions = tilingOptions.setTileSizes(tileSizesOfr);
@@ -652,9 +744,11 @@ transform::FuseOp::apply(transform::TransformRewriter &rewriter,
     tileAndFuseOptions.cleanupPatterns = std::move(patterns);
   }
 
+  size_t numLoops =
+      useForall ? 1 : tileSizes.size() - llvm::count(tileSizes, 0);
   LogicalResult result = applyTilingToAll(
-      rewriter, getOperation(), state.getPayloadOps(getTarget()),
-      tileSizes.size() - llvm::count(tileSizes, 0), transformResults,
+      rewriter, getOperation(), state.getPayloadOps(getTarget()), numLoops,
+      transformResults,
       [&](TilingInterface tilingInterfaceOp)
           -> FailureOr<scf::SCFTileAndFuseResult> {
         return tileConsumerAndFuseProducersUsingSCF(rewriter, tilingInterfaceOp,
@@ -665,24 +759,69 @@ transform::FuseOp::apply(transform::TransformRewriter &rewriter,
 }
 
 LogicalResult transform::FuseOp::verify() {
-  SmallVector<int64_t> permutation =
-      extractFromIntegerArrayAttr<int64_t>(getTileInterchange());
-  auto sequence = llvm::to_vector(llvm::seq<int64_t>(0, permutation.size()));
-  if (!std::is_permutation(sequence.begin(), sequence.end(),
-                           permutation.begin(), permutation.end())) {
-    return emitOpError() << "expects interchange to be a permutation, found "
-                         << getTileInterchange();
+  ArrayRef<int64_t> permutation = getStaticTileInterchange();
+  if (!llvm::any_of(permutation,
+                    [](int64_t v) { return ShapedType::isDynamic(v); })) {
+    auto sequence = llvm::to_vector(llvm::seq<int64_t>(0, permutation.size()));
+    if (!std::is_permutation(sequence.begin(), sequence.end(),
+                             permutation.begin(), permutation.end())) {
+      return emitOpError() << "expects interchange to be a permutation, found "
+                           << getTileInterchange();
+    }
   }
 
-  SmallVector<int64_t> sizes =
-      extractFromIntegerArrayAttr<int64_t>(getTileSizes());
-  size_t numExpectedLoops = sizes.size() - llvm::count(sizes, 0);
+  ArrayRef<int64_t> sizes = getStaticTileSizes();
+  size_t numExpectedLoops =
+      getUseForall() ? 1 : sizes.size() - llvm::count(sizes, 0);
   if (numExpectedLoops != getNumResults() - 1)
     return emitOpError() << "expects " << numExpectedLoops << " loop results";
 
   return success();
 }
 
+SmallVector<OpFoldResult> transform::FuseOp::getMixedTileSizes() {
+  ValueRange dynamicValues = getTileSizes();
+  ArrayRef<int64_t> staticValues = getStaticTileSizes();
+  SmallVector<OpFoldResult> results;
+  results.reserve(staticValues.size());
+  unsigned dynamicPos = 0;
+  Builder builder(getContext());
+  for (int64_t size : staticValues) {
+    if (size == ShapedType::kDynamic) {
+      results.push_back(dynamicValues[dynamicPos++]);
+    } else {
+      results.push_back(builder.getIndexAttr(size));
+    }
+  }
+  return results;
+}
+
+SmallVector<OpFoldResult> transform::FuseOp::getMixedTileInterchange() {
+  ValueRange dynamicValues = getTileInterchange();
+  ArrayRef<int64_t> staticValues = getStaticTileInterchange();
+  SmallVector<OpFoldResult> results;
+  results.reserve(staticValues.size());
+  unsigned dynamicPos = 0;
+  Builder builder(getContext());
+  for (int64_t size : staticValues) {
+    if (size == ShapedType::kDynamic) {
+      results.push_back(dynamicValues[dynamicPos++]);
+    } else {
+      results.push_back(builder.getIndexAttr(size));
+    }
+  }
+  return results;
+}
+
+void transform::FuseOp::getEffects(
+    SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
+  consumesHandle(getTargetMutable(), effects);
+  onlyReadsHandle(getTileSizesMutable(), effects);
+  onlyReadsHandle(getTileInterchangeMutable(), effects);
+  producesHandle(getOperation()->getOpResults(), effects);
+  modifiesPayload(effects);
+}
+
 //===----------------------------------------------------------------------===//
 // FuseIntoContainingOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/python/mlir/dialects/transform/structured.py b/mlir/python/mlir/dialects/transform/structured.py
index e3bacb5777d9f..d3fe3d5f085bf 100644
--- a/mlir/python/mlir/dialects/transform/structured.py
+++ b/mlir/python/mlir/dialects/transform/structured.py
@@ -144,9 +144,10 @@ def __init__(
         loop_types: Union[Type, Sequence[Type]],
         target: Union[Operation, Value, OpView],
         *,
-        tile_sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
-        tile_interchange: OptionalIntList = None,
+        tile_sizes: Optional[MixedValues] = None,
+        tile_interchange: Optional[MixedValues] = None,
         apply_cleanup: Optional[bool] = False,
+        use_forall: Optional[bool] = False,
         loc=None,
         ip=None,
     ):
@@ -157,9 +158,10 @@ def __init__(
         self,
         target: Union[Operation, Value, OpView],
         *,
-        tile_sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
-        tile_interchange: OptionalIntList = None,
+        tile_sizes: Optional[MixedValues] = None,
+        tile_interchange: Optional[MixedValues] = None,
         apply_cleanup: Optional[bool] = False,
+        use_forall: Optional[bool] = False,
         loc=None,
         ip=None,
     ):
@@ -170,17 +172,26 @@ def __init__(
         loop_types_or_target: Union[Type, Sequence[Type], Operation, OpView, Value],
         target_or_none: Optional[Union[Operation, Value, OpView]] = None,
         *,
-        tile_sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
-        tile_interchange: OptionalIntList = None,
+        tile_sizes: Optional[MixedValues] = None,
+        tile_interchange: Optional[MixedValues] = None,
         apply_cleanup: Optional[bool] = False,
+        use_forall: Optional[bool] = False,
         loc=None,
         ip=None,
     ):
         tile_sizes = tile_sizes if tile_sizes else []
         tile_interchange = tile_interchange if tile_interchange else []
-        _, tile_sizes, _ = _dispatch_dynamic_index_list(tile_sizes)
-        _, tile_interchange, _ = _dispatch_dynamic_index_list(tile_interchange)
-        num_loops = sum(0 if v == 0 else 1 for v in tile_sizes)
+        (
+            dynamic_tile_sizes,
+            static_tile_sizes,
+            _,
+        ) = _dispatch_dynamic_index_list(tile_sizes)
+        (
+            dynamic_tile_interchange,
+            static_tile_interchange,
+            _,
+        ) = _dispatch_dynamic_index_list(tile_interchange)
+        num_loops = 1 if use_forall else sum(0 if v == 0 else 1 for v in static_tile_sizes)
 
         if isinstance(loop_types_or_target, (Operation, Value, OpView)):
             loop_types = [transform.AnyOpType.get()] * num_loops
@@ -197,9 +208,12 @@ def __init__(
             target.type,
             loop_types,
             target,
-            tile_sizes=tile_sizes,
-            tile_interchange=tile_interchange,
+            tile_sizes=dynamic_tile_sizes,
+            tile_interchange=dynamic_tile_interchange,
+            static_tile_sizes=static_tile_sizes,
+            static_tile_interchange=static_tile_interchange,
             apply_cleanup=apply_cleanup,
+            use_forall=use_forall,
             loc=loc,
             ip=ip,
         )
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
index 9a44f95afb586..d472f75bfcb9a 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -18,7 +18,7 @@ func.func @fuse_unary(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
+    %1, %loops:2 = transform.structured.fuse %0 tile_sizes [32, 32] interchange [0, 1]
       : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
       transform.yield
   }
@@ -48,7 +48,7 @@ func.func @fuse_unary(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
+    %1, %loops:2 = transform.structured.fuse %0 tile_sizes [32, 32] interchange [0, 1]
       : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
     transform.loop.peel %loops#0 : (!transform.op<"scf.for">) -> (!transform.any_op, !transform.any_op)
     transform.yield
@@ -57,6 +57,60 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// CHECK-LABEL: func.func @fuse_unary_param
+func.func @fuse_unary_param(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
+
+  //     CHECK: %[[RES:.*]] = scf.for
+  //     CHECK:    scf.for
+  //     CHECK:       linalg.exp
+  //     CHECK:       linalg.add
+  //     CHECK: return %[[RES]]
+  %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
+                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+  %1 = linalg.add ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
+                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+  return %1 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.param.constant 32 : i32 -> !transform.param<i32>
+    %2 = transform.param.constant 1 : i32 -> !transform.param<i32>
+    %3, %loops:2 = transform.structured.fuse %0 tile_sizes [%1, 32] interchange [0, %2]
+      : (!transform.any_op, !transform.param<i32>, !transform.param<i32>) ->
+      (!transfor...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/161883