[Mlir-commits] [mlir] 44cfea0 - [mlir][Linalg] Retire LinalgStrategyTilePass and filter-based pattern.

Tue Oct 11 02:43:06 PDT 2022

Author: Nicolas Vasilache
Date: 2022-10-11T02:42:56-07:00
New Revision: 44cfea0279a4fb9ea8cb0c68a2b5ee7a81654071

URL: https://github.com/llvm/llvm-project/commit/44cfea0279a4fb9ea8cb0c68a2b5ee7a81654071
DIFF: https://github.com/llvm/llvm-project/commit/44cfea0279a4fb9ea8cb0c68a2b5ee7a81654071.diff

LOG: [mlir][Linalg] Retire LinalgStrategyTilePass and filter-based pattern.

Context: https://discourse.llvm.org/t/psa-retire-linalg-filter-based-patterns/63785

Uses of `LinalgTilingPattern::returningMatchAndRewrite` are replaced by a top-level `tileWithLinalgTilingOptions` function that is marked obsolete and serves
as a temporary means to transition away from `LinalgTilingOptions`-based tiling.
LinalgTilingOptions supports too many options that have been orthogonalized with the use of the transform dialect.

Additionally, the revision introduces a `transform.structured.tile_to_scf_for` structured transform operation that is needed to properly tile `tensor.pad`
via the TilingInterface. Uses of `transform.structured.tile` will be deprecated and replaced by this new op.
This will achieve the deprecation of `linalg::tileLinalgOp`.
Context: https://discourse.llvm.org/t/psa-retire-tileandfuselinalgops-method/63850

In the process of transitioning, tests that were performing tile and distribute on tensors are retired: transformations should be orthogonalized better in the future.
In particular, tiling to specific loop types and tileAndDistribute behavior are not available via the transform ops.
The behavior is still available as part of the `tileWithLinalgTilingOptions` method to allow downstream clients to transition without breakages but is meant to be retired soon.

As more tests are ported to the transform dialect, it became necessary to introduce a test-transform-dialect-erase-schedule-pass to discard the transform specification
once applied so that e2e lowering and execution is possible.

Lastly, a number of redundant tests that were testing composition of patterns are retired as they are available with a better mechanism via the transform dialect.

Differential Revision: https://reviews.llvm.org/D135573

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/Passes.h
    mlir/include/mlir/Dialect/Linalg/Passes.td
    mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
    mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
    mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
    mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
    mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
    mlir/test/Dialect/Linalg/tile-conv.mlir
    mlir/test/Dialect/Linalg/tile-indexed.mlir
    mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
    mlir/test/Dialect/Linalg/tile-tensors.mlir
    mlir/test/Dialect/Linalg/transform-patterns.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
    mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
    mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
    mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp
    mlir/tools/mlir-opt/mlir-opt.cpp

Removed: 
    mlir/test/Dialect/Linalg/tile-and-distribute.mlir
    mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
    mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
    mlir/test/Dialect/Linalg/tile-parallel.mlir
    mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
    mlir/test/Dialect/Linalg/tile-zero.mlir
    mlir/test/Dialect/Linalg/tile.mlir


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
index 6e41f05cc36d3..40ca0277a13d9 100644

--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -38,11 +38,6 @@ std::unique_ptr<Pass> createFoldReshapeOpsByLinearizationPass();
 
 std::unique_ptr<Pass> createLinalgNamedOpConversionPass();
 
-std::unique_ptr<OperationPass<func::FuncOp>>
-createLinalgTilingPass(ArrayRef<int64_t> tileSizes = {},
-                       linalg::LinalgTilingLoopType loopType =
-                           linalg::LinalgTilingLoopType::Loops);
-
 std::unique_ptr<OperationPass<func::FuncOp>>
 createLinalgInlineScalarOperandsPass();
 

diff  --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
index 40a2f112f0a15..73fd30bbf8749 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -102,22 +102,6 @@ def LinalgBufferize : Pass<"linalg-bufferize", "func::FuncOp"> {
   ];
 }
 
-def LinalgTilingPass : Pass<"linalg-tile", "func::FuncOp"> {
-  let summary = "Tile operations in the linalg dialect";
-  let constructor = "mlir::createLinalgTilingPass()";
-  let dependentDialects = [
-    "AffineDialect",
-    "linalg::LinalgDialect",
-    "memref::MemRefDialect",
-    "scf::SCFDialect"
-  ];
-  let options = [
-    ListOption<"tileSizes", "tile-sizes", "int64_t", "Tile sizes">,
-    Option<"loopType", "loop-type", "std::string", /*default=*/"\"for\"",
-           "Specify the type of loops to generate: for, parallel">
-  ];
-}
-
 def LinalgGeneralization : Pass<"linalg-generalize-named-ops", "func::FuncOp"> {
   let summary = "Convert named ops into generic ops";
   let constructor = "mlir::createLinalgGeneralizationPass()";
@@ -162,19 +146,6 @@ def LinalgDetensorize : Pass<"linalg-detensorize", ""> {
   ];
 }
 
-def LinalgStrategyTilePass
-    : Pass<"linalg-strategy-tile-pass", "func::FuncOp"> {
-  let summary = "Configurable pass to apply pattern-based linalg tiling.";
-  let constructor = "mlir::createLinalgStrategyTilePass()";
-  let dependentDialects = ["linalg::LinalgDialect"];
-  let options = [
-    Option<"anchorFuncName", "anchor-func", "std::string", /*default=*/"",
-      "Which func op is the anchor to latch on.">,
-    Option<"anchorOpName", "anchor-op", "std::string", /*default=*/"",
-      "Which linalg op within the func is the anchor to latch on.">,
-  ];
-}
-
 def LinalgStrategyRemoveMarkersPass
     : Pass<"linalg-strategy-remove-markers-pass", "func::FuncOp"> {
   let summary = "Cleanup pass that drops markers.";

diff  --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 491c5a8aed06e..be4efaafc6ca9 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -751,6 +751,63 @@ def TileToForeachThreadOp :
   }];
 }
 
+def TileToScfForOp : Op<Transform_Dialect, "structured.tile_to_scf_for",
+       [DeclareOpInterfaceMethods<TransformOpInterface>,
+        DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+  let description = [{
+    Indicates that the given `target` op should be tiled with the given sizes.
+    This transform generates a loop nest with a smaller ("tiled") target
+    operation in its body. The target must implement TilingInterface.
+
+    Tile sizes may be known at transformation time, in which case they are
+    expected to be provided in the `static_size` attribute, or not, in which
+    case the tile value must be computed by the payload IR and the handle to the
+    operation computing it must be provided through `dynamic_sizes`. When the
+    sizes are not known statically, the corresponding entry in the
+    `static_sizes` attribute must be set to `ShapedType::kDynamicSize`. Only
+    the dynamic sizes must be provided in `dynamic_sizes`, i.e., there should
+    be as many handles as `ShapedType::kDynamicSize` values in the
+    `static_sizes` attribute. A static size of `0` indicates that the dimension
+    should not be tiled. No loop will be generated for such dimensions. If all
+    tile sizes are `0`, this transform is effectively a no-op.
+
+    This op returns handles to the tiled op (in the generated loop nest) and the
+    generated loops. The number of loops is the number of tile sizes that are
+    statically known to be non-zero.
+
+    #### Return modes
+
+    On success, the resulting handles are associated with co-indexed lists of
+    tiled operations and loops around them.
+
+    This operation only supports TilingInterface ops and produces a silenceable
+    failure if the input contains any non-TilingInterface ops. The ops preceding
+    it in the list associated with the `target` handle will have been tiled.
+
+    This operation produces a silenceable failure if the `dynamic_sizes` handles
+    are associated with lists of payload operations of a size 
diff erent than
+    that of the list associated with the `target` handle.
+
+    If the internal implementation of tiling for any of the operations fails,
+    produces a definite failure.
+  }];
+
+  let arguments = (ins PDL_Operation:$target,
+                   Variadic<PDL_Operation>:$dynamic_sizes,
+                   DefaultValuedAttr<I64ArrayAttr, "{}">:$static_sizes,
+                   DefaultValuedAttr<I64ArrayAttr, "{}">:$interchange);
+  let results = (outs PDL_Operation:$tiled_linalg_op,
+                      Variadic<PDL_Operation>:$loops);
+
+  let hasCustomAssemblyFormat = 1;
+
+  let extraClassDeclaration = [{
+    /// Returns the list of tile sizes, which may be static (Attribute) or
+    /// dynamic (Value).
+    SmallVector<OpFoldResult> getMixedSizes();
+  }];
+}
+
 def VectorizeOp : Op<Transform_Dialect, "structured.vectorize",
     [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
      TransformEachOpTrait, TransformOpInterface]> {

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h
index d7c0d22031692..ae3df323dd5c1 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h
@@ -30,41 +30,8 @@ struct Transformation {
   LinalgTransformationFilter::FilterFunction filter = nullptr;
 };
 
-/// Represent one application of LinalgStrategyTilePass.
-struct Tile : public Transformation {
-  Tile(StringRef name, linalg::LinalgTilingOptions options,
-       LinalgTransformationFilter::FilterFunction f = nullptr)
-      : Transformation(std::move(f)), opName(name),
-        options(std::move(options)) {}
-
-  void addToPassPipeline(OpPassManager &pm,
-                         LinalgTransformationFilter m) const override {
-    pm.addPass(createLinalgStrategyTilePass(opName, options, m));
-  }
-
-private:
-  std::string opName;
-  linalg::LinalgTilingOptions options;
-};
-
 /// Codegen strategy controls how a Linalg op is progressively lowered.
 struct CodegenStrategy {
-  /// Append a pattern to add a level of tiling for Op `opName` with tiling
-  /// `options`.
-  CodegenStrategy &
-  tile(StringRef opName, const linalg::LinalgTilingOptions &options,
-       const LinalgTransformationFilter::FilterFunction &f = nullptr) {
-    transformationSequence.emplace_back(
-        std::make_unique<Tile>(opName, options, f));
-    return *this;
-  }
-  /// Conditionally append a pattern to add a level of tiling for
-  /// `LinalgOpType` with tiling `options`.
-  CodegenStrategy &
-  tileIf(bool b, StringRef opName, linalg::LinalgTilingOptions options,
-         LinalgTransformationFilter::FilterFunction f = nullptr) {
-    return b ? tile(opName, std::move(options), std::move(f)) : *this;
-  }
   /// Configure the post staged-patterns global enabling passes options.
   CodegenStrategy &
   setVectorTransferToSCFOptions(LinalgEnablingOptions options) {

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index fb37c6f227728..044ce8dbdbb76 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -696,57 +696,26 @@ struct LinalgTilingOptions {
 RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx);
 void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns);
 
-///
-/// Linalg tiling pattern.
-///
-/// Apply the `tiling` transformation as a pattern.
-/// `filter` controls LinalgTransformMarker matching and update when specified.
-/// See `tiling` for more details.
-// TODO: TiledOpInterface
-struct LinalgTilingPattern : public OpInterfaceRewritePattern<LinalgOp> {
-  /// Construct a generic pattern applied to all LinalgOp that verify `filter`.
-  LinalgTilingPattern(
-      MLIRContext *context, LinalgTilingOptions options,
-      LinalgTransformationFilter f = LinalgTransformationFilter(),
-      PatternBenefit benefit = 1);
-
-  /// Construct a pattern specifically applied to `opName`.
-  LinalgTilingPattern(
-      StringRef opName, MLIRContext *context, LinalgTilingOptions options,
-      LinalgTransformationFilter f = LinalgTransformationFilter(),
-      PatternBenefit benefit = 1);
-
-  /// `matchAndRewrite` implementation that returns the significant transformed
-  /// pieces of IR.
-  FailureOr<TiledLinalgOp>
-  returningMatchAndRewrite(LinalgOp op, PatternRewriter &rewriter) const;
-
-  LogicalResult matchAndRewrite(LinalgOp op,
-                                PatternRewriter &rewriter) const override {
-    return returningMatchAndRewrite(op, rewriter);
-  }
-
-private:
-  /// LinalgTransformMarker handles special attribute manipulations.
-  LinalgTransformationFilter filter;
-  /// Options to control tiling;
-  LinalgTilingOptions options;
-};
+/// Perform tiling using LinalgTilingOptions.
+/// Note: this is on a path to deprecation that only works on LinalgOp.
+/// Clients should favor using `tileUsingSCFForOp`  that more generally works on
+/// TilingInterface.
+FailureOr<TiledLinalgOp>
+tileWithLinalgTilingOptions(RewriterBase &rewriter, LinalgOp op,
+                            const LinalgTilingOptions &options);
 
 ///
 /// Linalg padding pattern.
 ///
 /// Apply the `padding` transformation as a pattern.
-/// `filter` controls LinalgTransformMarker matching and update when specified.
 /// See `padding` for more details.
 struct LinalgPaddingPattern : public OpInterfaceRewritePattern<LinalgOp> {
-  /// Construct a generic pattern applied to all LinalgOp that verify `filter`.
   LinalgPaddingPattern(MLIRContext *context,
                        LinalgPaddingOptions options = LinalgPaddingOptions(),
                        PatternBenefit benefit = 1);
 
-  /// `matchAndRewrite` implementation that returns the significant transformed
-  /// pieces of IR.
+  /// `matchAndRewrite` implementation that returns the significant
+  /// transformed pieces of IR.
   FailureOr<LinalgOp> returningMatchAndRewrite(LinalgOp op,
                                                PatternRewriter &rewriter) const;
 
@@ -954,9 +923,9 @@ void populateLinalgNamedOpsGeneralizationPatterns(
 
 /// Linalg decompose convolutions patterns
 
-/// Populates patterns to decompose high-D convolution ops into low-D ones. This
-/// is a step in progressive lowering for convolution ops, afterwards we can
-/// vectorize the low-D convolution ops.
+/// Populates patterns to decompose high-D convolution ops into low-D ones.
+/// This is a step in progressive lowering for convolution ops, afterwards we
+/// can vectorize the low-D convolution ops.
 void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns,
                                           PatternBenefit benefit = 1);
 
@@ -977,8 +946,8 @@ struct PadOpTransformationPattern : public OpRewritePattern<tensor::PadOp> {
 /// a static bounding box. Use `paddingValues` and `packPaddings` to set padding
 /// value and nofold attribute of the created tensor::PadOps, respectively.
 /// Update `paddedOp` to the cloned operation with statically shaped
-/// `paddingDimensions` and return the extracted dynamically shaped results. If
-/// padding fails, return failure.
+/// `paddingDimensions` and return the extracted dynamically shaped results.
+/// If padding fails, return failure.
 FailureOr<SmallVector<Value>>
 rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
                   ArrayRef<int64_t> paddingDimensions,
@@ -1132,29 +1101,6 @@ class VectorizationPatterns<> {
                      const LinalgTransformationFilter &f) {}
 };
 
-template <typename... OpTypes>
-class TilingPatterns;
-
-template <>
-class TilingPatterns<> {
-public:
-  static void insert(RewritePatternSet &patterns,
-                     const LinalgTilingOptions &options,
-                     const LinalgTransformationFilter &f) {}
-};
-
-template <typename OpTy, typename... OpTypes>
-class TilingPatterns<OpTy, OpTypes...> {
-public:
-  static void insert(RewritePatternSet &patterns,
-                     const LinalgTilingOptions &options,
-                     const LinalgTransformationFilter &f) {
-    patterns.add<LinalgTilingPattern>(OpTy::getOperationName(),
-                                      patterns.getContext(), options, f);
-    TilingPatterns<OpTypes...>::insert(patterns, options, f);
-  }
-};
-
 /// Split Reduction options.
 struct SplitReductionOptions {
   // Ratio used to split the reduction dimension.  If the ratio is <= 1, nothing
@@ -1181,8 +1127,10 @@ void populateSplitReductionPattern(
 
 /// Apply transformation to split the single linalg op reduction into a parallel
 /// and reduction dimension. Then create a new linalg.generic op doing the rest
-/// of the reduction. Return the new linalg op with an extra parallel dimension
-/// or failure if the transformation didn't happen.
+/// of the reduction.
+/// Return the new linalg op with an extra parallel dimension or failure if the
+/// transformation didn't happen.
+///
 /// Example:
 /// ```
 ///  %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
@@ -1265,7 +1213,7 @@ splitReduction(PatternRewriter &b, LinalgOp op,
 ///  %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
 ///    iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
 ///    ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>)
-///    outs(%1 : tensor<16x32x64xf32>) {
+///   outs(%1 : tensor<16x32x64xf32>) {
 ///      ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):
 ///        %5 = arith.mulf %arg3, %arg4 : f32
 ///        %6 = arith.addf %arg6, %5 : f32

diff  --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 5b825201ebcb2..ed74de7f61f58 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -37,6 +37,16 @@ static SmallVector<unsigned> extractUIntArray(ArrayAttr attr) {
   return result;
 }
 
+/// Extracts a vector of int64_t from an array attribute. Asserts if the
+/// attribute contains values other than integers.
+static SmallVector<int64_t> extractI64Array(ArrayAttr attr) {
+  SmallVector<int64_t> result;
+  result.reserve(attr.size());
+  for (APInt value : attr.getAsValueRange<IntegerAttr>())
+    result.push_back(value.getSExtValue());
+  return result;
+}
+
 namespace {
 /// A simple pattern rewriter that implements no special logic.
 class SimpleRewriter : public PatternRewriter {
@@ -858,11 +868,10 @@ transform::ScalarizeOp::applyToOne(linalg::LinalgOp target,
   // Tiling with "scalarize_dyn_dims" actually sets the same lambda as the
   // tile sizes and asserts that it is not already set.
   SmallVector<int64_t> emptyTileSizes;
-  LinalgTilingPattern pattern(getContext(), tilingOptions);
   SimpleRewriter rewriter(getContext());
   rewriter.setInsertionPoint(target);
   FailureOr<TiledLinalgOp> result =
-      pattern.returningMatchAndRewrite(target, rewriter);
+      tileWithLinalgTilingOptions(rewriter, target, tilingOptions);
   if (failed(result))
     return DiagnosedSilenceableFailure(reportUnknownTransformError(target));
 
@@ -1052,7 +1061,6 @@ transform::SplitReductionOp::applyToOne(linalg::LinalgOp target,
 DiagnosedSilenceableFailure
 transform::TileOp::apply(TransformResults &transformResults,
                          TransformState &state) {
-  LinalgTilingOptions tilingOptions;
   SmallVector<int64_t> tileSizes = extractFromI64ArrayAttr(getStaticSizes());
 
   ArrayRef<Operation *> targets = state.getPayloadOps(getTarget());
@@ -1097,6 +1105,7 @@ transform::TileOp::apply(TransformResults &transformResults,
       return diag;
     }
 
+    LinalgTilingOptions tilingOptions;
     unsigned index = en.index();
     if (!tileSizes.empty()) {
       tilingOptions.setTileSizeComputationFunction(
@@ -1118,10 +1127,9 @@ transform::TileOp::apply(TransformResults &transformResults,
     }
 
     tilingOptions.setInterchange(extractUIntArray(getInterchange()));
-    LinalgTilingPattern pattern(getContext(), tilingOptions);
     SimpleRewriter rewriter(linalgOp.getContext());
     FailureOr<TiledLinalgOp> tiledOp =
-        pattern.returningMatchAndRewrite(linalgOp, rewriter);
+        tileWithLinalgTilingOptions(rewriter, linalgOp, tilingOptions);
     if (failed(tiledOp))
       return DiagnosedSilenceableFailure::definiteFailure();
 
@@ -1340,6 +1348,153 @@ LogicalResult TileToForeachThreadOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// TileToScfForOp
+//===----------------------------------------------------------------------===//
+
+DiagnosedSilenceableFailure
+transform::TileToScfForOp::apply(TransformResults &transformResults,
+                                 TransformState &state) {
+  SmallVector<int64_t> tileSizes = extractFromI64ArrayAttr(getStaticSizes());
+
+  ArrayRef<Operation *> targets = state.getPayloadOps(getTarget());
+  SmallVector<ArrayRef<Operation *>> dynamicSizeProducers;
+  dynamicSizeProducers.reserve(getDynamicSizes().size());
+  for (Value dynamicSizeProducerHandle : getDynamicSizes()) {
+    dynamicSizeProducers.push_back(
+        state.getPayloadOps(dynamicSizeProducerHandle));
+
+    if (dynamicSizeProducers.back().size() != targets.size()) {
+      DiagnosedSilenceableFailure diag =
+          emitSilenceableError()
+          << "expected as many dynamic size-producing operations ("
+          << dynamicSizeProducers.back().size() << ") as target ops ("
+          << targets.size() << ")";
+      diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle";
+      return diag;
+    }
+
+    for (Operation *op : dynamicSizeProducers.back()) {
+      if (op->getNumResults() == 1 &&
+          op->getResult(0).getType().isa<IndexType>())
+        continue;
+      DiagnosedSilenceableFailure diag =
+          emitSilenceableError() << "expected sizes to be produced by ops "
+                                    "with a single index-type result";
+      diag.attachNote(op->getLoc()) << "size producer op";
+      diag.attachNote(dynamicSizeProducerHandle.getLoc()) << "for this handle";
+      return diag;
+    }
+  }
+
+  SmallVector<Operation *> tiled;
+  SmallVector<SmallVector<Operation *, 4>, 4> loops;
+  loops.resize(getLoops().size());
+  for (auto &en : llvm::enumerate(targets)) {
+    auto tilingInterfaceOp = dyn_cast<TilingInterface>(en.value());
+    if (!tilingInterfaceOp) {
+      DiagnosedSilenceableFailure diag =
+          emitSilenceableError() << "only TilingInterface ops are supported";
+      diag.attachNote(en.value()->getLoc()) << "target op";
+      return diag;
+    }
+
+    scf::SCFTilingOptions tilingOptions;
+    unsigned index = en.index();
+    if (!tileSizes.empty()) {
+      tilingOptions.setTileSizeComputationFunction(
+          [&, index](OpBuilder &b, Operation *) {
+            SmallVector<Value, 4> sizes;
+            sizes.reserve(tileSizes.size());
+            unsigned dynamicIdx = 0;
+            for (OpFoldResult ofr : getMixedSizes()) {
+              if (auto attr = ofr.dyn_cast<Attribute>()) {
+                sizes.push_back(b.create<arith::ConstantIndexOp>(
+                    getLoc(), attr.cast<IntegerAttr>().getInt()));
+              } else {
+                sizes.push_back(
+                    dynamicSizeProducers[dynamicIdx++][index]->getResult(0));
+              }
+            }
+            return sizes;
+          });
+    }
+
+    tilingOptions.setInterchange(extractI64Array(getInterchange()));
+    SimpleRewriter rewriter(tilingInterfaceOp.getContext());
+    FailureOr<scf::SCFTilingResult> tilingResult =
+        tileUsingSCFForOp(rewriter, tilingInterfaceOp, tilingOptions);
+    if (failed(tilingResult))
+      return DiagnosedSilenceableFailure::definiteFailure();
+
+    rewriter.replaceOp(tilingInterfaceOp, tilingResult->replacements);
+
+    tiled.push_back(tilingResult->tiledOp);
+    for (const auto &en2 : llvm::enumerate(tilingResult->loops))
+      loops[en2.index()].push_back(en2.value());
+  }
+
+  transformResults.set(getTiledLinalgOp().cast<OpResult>(), tiled);
+  for (const auto &en : llvm::enumerate(loops))
+    transformResults.set(getLoops()[en.index()].cast<OpResult>(), en.value());
+
+  return DiagnosedSilenceableFailure::success();
+}
+
+SmallVector<OpFoldResult> transform::TileToScfForOp::getMixedSizes() {
+  ValueRange dynamic = getDynamicSizes();
+  SmallVector<int64_t> tileSizes = extractFromI64ArrayAttr(getStaticSizes());
+  SmallVector<OpFoldResult> results;
+  results.reserve(tileSizes.size());
+  unsigned dynamicPos = 0;
+  Builder builder(getContext());
+  for (int64_t size : tileSizes) {
+    if (size == ShapedType::kDynamicSize) {
+      results.push_back(dynamic[dynamicPos++]);
+    } else {
+      results.push_back(builder.getIndexAttr(size));
+    }
+  }
+  return results;
+}
+
+ParseResult transform::TileToScfForOp::parse(OpAsmParser &parser,
+                                             OperationState &result) {
+  OpAsmParser::UnresolvedOperand target;
+  SmallVector<OpAsmParser::UnresolvedOperand> dynamicSizes;
+  ArrayAttr staticSizes;
+  auto pdlOperationType = pdl::OperationType::get(parser.getContext());
+  if (parser.parseOperand(target) ||
+      parser.resolveOperand(target, pdlOperationType, result.operands) ||
+      parseDynamicIndexList(parser, dynamicSizes, staticSizes,
+                            ShapedType::kDynamicSize) ||
+      parser.resolveOperands(dynamicSizes, pdlOperationType, result.operands) ||
+      parser.parseOptionalAttrDict(result.attributes))
+    return ParseResult::failure();
+
+  result.addAttribute(getStaticSizesAttrName(result.name), staticSizes);
+  size_t numExpectedLoops =
+      staticSizes.size() - llvm::count(extractFromI64ArrayAttr(staticSizes), 0);
+  result.addTypes(SmallVector<Type>(numExpectedLoops + 1, pdlOperationType));
+  return success();
+}
+
+void TileToScfForOp::print(OpAsmPrinter &p) {
+  p << ' ' << getTarget();
+  printDynamicIndexList(p, getOperation(), getDynamicSizes(), getStaticSizes(),
+                        ShapedType::kDynamicSize);
+  p.printOptionalAttrDict((*this)->getAttrs(), {getStaticSizesAttrName()});
+}
+
+void transform::TileToScfForOp::getEffects(
+    SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
+  consumesHandle(getTarget(), effects);
+  onlyReadsHandle(getDynamicSizes(), effects);
+  producesHandle(getTiledLinalgOp(), effects);
+  producesHandle(getLoops(), effects);
+  modifiesPayload(effects);
+}
+
 //===----------------------------------------------------------------------===//
 // VectorizeOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
index 162e74f1ba31d..39a9c7f49bd03 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -51,40 +51,6 @@ using namespace linalg;
 
 namespace {
 
-/// Configurable pass to apply pattern-based linalg tiling.
-struct LinalgStrategyTilePass
-    : public impl::LinalgStrategyTilePassBase<LinalgStrategyTilePass> {
-
-  LinalgStrategyTilePass() = default;
-
-  LinalgStrategyTilePass(StringRef opName,
-                         mlir::linalg::LinalgTilingOptions opt,
-                         LinalgTransformationFilter filt)
-      : options(std::move(opt)), filter(std::move(filt)) {
-    this->anchorOpName.setValue(opName.str());
-  }
-
-  void runOnOperation() override {
-    auto funcOp = getOperation();
-    if (!anchorFuncName.empty() && funcOp.getName() != anchorFuncName)
-      return;
-
-    MLIRContext *ctx = funcOp.getContext();
-    RewritePatternSet tilingPattern(ctx);
-    if (!anchorOpName.empty())
-      tilingPattern.add<LinalgTilingPattern>(anchorOpName, ctx, options,
-                                             filter);
-    else
-      tilingPattern.add<LinalgTilingPattern>(ctx, options, filter);
-    if (anchorOpName == tensor::PadOp::getOperationName())
-      populatePadTensorTilingPatterns(tilingPattern, options);
-    (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern));
-  }
-
-  mlir::linalg::LinalgTilingOptions options;
-  LinalgTransformationFilter filter;
-};
-
 /// Configurable pass to lower vector operations.
 struct LinalgStrategyRemoveMarkersPass
     : public impl::LinalgStrategyRemoveMarkersPassBase<
@@ -101,14 +67,6 @@ struct LinalgStrategyRemoveMarkersPass
 };
 } // namespace
 
-/// Create a LinalgStrategyTilePass.
-std::unique_ptr<OperationPass<func::FuncOp>>
-mlir::createLinalgStrategyTilePass(StringRef opName,
-                                   const LinalgTilingOptions &opt,
-                                   const LinalgTransformationFilter &filter) {
-  return std::make_unique<LinalgStrategyTilePass>(opName, opt, filter);
-}
-
 /// Create a LinalgStrategyRemoveMarkersPass.
 std::unique_ptr<OperationPass<func::FuncOp>>
 mlir::createLinalgStrategyRemoveMarkersPass() {

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index d3779061060b8..c0ff3e0a55e49 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -732,77 +732,8 @@ void mlir::linalg::populateLinalgTilingCanonicalizationPatterns(
       >::insert(patterns);
 }
 
-/// Populate the given list with patterns that apply Linalg tiling.
-static void insertTilingPatterns(RewritePatternSet &patterns,
-                                 const LinalgTilingOptions &options) {
-  auto *ctx = patterns.getContext();
-  LinalgTransformationFilter f(ArrayRef<StringAttr>{},
-                               StringAttr::get(ctx, "tiled"));
-  TilingPatterns<GenericOp,
-#define GET_OP_LIST
-#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
-                 >::insert(patterns, options, f);
-  patterns.add<PadOpTilingPattern>(ctx, options);
-}
-
 void mlir::linalg::populatePadTensorTilingPatterns(
     RewritePatternSet &patterns, const LinalgTilingOptions &options) {
   auto *ctx = patterns.getContext();
   patterns.add<PadOpTilingPattern>(ctx, options);
 }
-
-static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp) {
-  MLIRContext *ctx = funcOp.getContext();
-  RewritePatternSet patterns(ctx);
-  patterns.add<ExtractSliceOfPadTensorSwapPattern>(patterns.getContext());
-  (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
-  (void)applyPatternsAndFoldGreedily(
-      funcOp, getLinalgTilingCanonicalizationPatterns(ctx));
-}
-
-namespace {
-struct LinalgTilingPass : public impl::LinalgTilingPassBase<LinalgTilingPass> {
-  LinalgTilingPass() = default;
-  LinalgTilingPass(ArrayRef<int64_t> tileSizes, LinalgTilingLoopType loopType) {
-    this->tileSizes = tileSizes;
-    this->loopType = "";
-    this->loopTypeEnum = loopType;
-  }
-
-  void runOnOperation() override {
-    func::FuncOp funcOp = getOperation();
-    LinalgTilingLoopType type =
-        llvm::StringSwitch<LinalgTilingLoopType>(loopType)
-            .Case("for", LinalgTilingLoopType::Loops)
-            .Case("affine", LinalgTilingLoopType::AffineLoops)
-            .Case("parallel", LinalgTilingLoopType::ParallelLoops)
-            .Default(loopTypeEnum);
-    auto options =
-        LinalgTilingOptions().setTileSizes(tileSizes).setLoopType(type);
-    MLIRContext *ctx = funcOp.getContext();
-    RewritePatternSet patterns(ctx);
-    insertTilingPatterns(patterns, options);
-    scf::populateSCFForLoopCanonicalizationPatterns(patterns);
-    (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
-    (void)applyPatternsAndFoldGreedily(
-        funcOp, getLinalgTilingCanonicalizationPatterns(ctx));
-    // Drop the marker.
-    funcOp.walk([](LinalgOp op) {
-      op->removeAttr(LinalgTransforms::kLinalgTransformMarker);
-    });
-
-    // Apply swap pattern after generating loop nest and running
-    // canonicalizations.
-    applyExtractSliceOfPadTensorSwapPattern(funcOp);
-  }
-
-  LinalgTilingLoopType loopTypeEnum;
-};
-
-} // namespace
-
-std::unique_ptr<OperationPass<func::FuncOp>>
-mlir::createLinalgTilingPass(ArrayRef<int64_t> tileSizes,
-                             linalg::LinalgTilingLoopType loopType) {
-  return std::make_unique<LinalgTilingPass>(tileSizes, loopType);
-}

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 938b9e736bf72..58923bc0ac64b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -356,33 +356,13 @@ void mlir::linalg::peelTiledLinalgOp(RewriterBase &rewriter, TiledLinalgOp &res,
   }
 }
 
-/// Linalg tiling pattern.
-mlir::linalg::LinalgTilingPattern::LinalgTilingPattern(
-    MLIRContext *context, LinalgTilingOptions options,
-    LinalgTransformationFilter f, PatternBenefit benefit)
-    : OpInterfaceRewritePattern<LinalgOp>(context, benefit),
-      filter(std::move(f)), options(std::move(options)) {}
-
-mlir::linalg::LinalgTilingPattern::LinalgTilingPattern(
-    StringRef opName, MLIRContext *context, LinalgTilingOptions options,
-    LinalgTransformationFilter f, PatternBenefit benefit)
-    : OpInterfaceRewritePattern<LinalgOp>(context, benefit),
-      filter(f.addOpNameFilter(opName)), options(std::move(options)) {}
-
 FailureOr<TiledLinalgOp>
-mlir::linalg::LinalgTilingPattern::returningMatchAndRewrite(
-    LinalgOp op, PatternRewriter &rewriter) const {
-  if (failed(filter.checkAndNotify(rewriter, op)))
-    return failure();
-
+mlir::linalg::tileWithLinalgTilingOptions(RewriterBase &rewriter, LinalgOp op,
+                                          const LinalgTilingOptions &options) {
   FailureOr<TiledLinalgOp> res = tileLinalgOp(rewriter, op, options);
   if (failed(res))
     return failure();
 
-  // Clear filter to stop recursive pattern application.
-  // This must be done here to properly propagate to peeling branches.
-  filter.replaceLinalgTransformationFilter(rewriter, res->op);
-
   // Peel the loops of the TiledLinalgOp.
   peelTiledLinalgOp(rewriter, *res, options.peeledLoops, options.loopType);
 

diff  --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
deleted file mode 100644
index 6178aa393ee0e..0000000000000
--- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir
+++ /dev/null
@@ -1,219 +0,0 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns=test-tile-and-distribute-options -split-input-file | FileCheck %s
-
-func.func @gemm1(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute1"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm1(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: scf.for %[[ARG3:.*]] =
-//      CHECK:   %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:   %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:   %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:   %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:   %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
-//      CHECK:   %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
-//      CHECK:   %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
-//      CHECK:   linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm2(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul  {__internal_linalg_transform__ = "distribute2"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c:memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm2(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: %[[ITERY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[ITERX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[INBOUNDSY:.*]] = arith.cmpi slt, %[[ITERY]], %{{.*}}
-//      CHECK: %[[INBOUNDSX:.*]] = arith.cmpi slt, %[[ITERX]], %{{.*}}
-//      CHECK: %[[INBOUNDS:.*]] = arith.andi %[[INBOUNDSY]], %[[INBOUNDSX]]
-//      CHECK: scf.if %[[INBOUNDS]]
-//      CHECK:   scf.for %[[ARG3:.*]] =
-//      CHECK:     %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm3(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute3"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm3(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//  CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-//      CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
-//      CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]
-//      CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[LBY]], %[[LBX]]) to (%{{.*}}, %{{.*}}) step (%[[STEPY]], %[[STEPX]])
-//      CHECK:   scf.for %[[ARG5:.*]] =
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG5]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG5]], %[[ARG4]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm4(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute4"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm4(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBX]], %{{.*}}
-//      CHECK: scf.if %[[INBOUNDS]]
-//      CHECK:   scf.for %[[ARG3:.*]] =
-//      CHECK:     %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm5(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute5"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm5(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//  CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-//      CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]]
-//      CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBY]], %{{.*}}
-//      CHECK: scf.if %[[INBOUNDS]]
-//      CHECK:   scf.parallel (%[[ARG3:.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]])
-//      CHECK:     scf.for %[[ARG4:.*]] =
-//      CHECK:      %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:       %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK:       %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]]
-//      CHECK:       %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[ARG3]]]
-//      CHECK:       %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]]
-//      CHECK:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-func.func @gemm6(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
-{
-  linalg.matmul {__internal_linalg_transform__ = "distribute6"}
-    ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%c: memref<?x?xf32>)
-  return
-}
-//  CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 * 8)>
-//      CHECK: func @gemm6(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//      CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
-//      CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
-//      CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]])
-//      CHECK:   scf.for %[[ARG4:.*]] =
-//      CHECK:     %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]]
-//      CHECK:     %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
-//      CHECK:     %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]]
-//      CHECK:     %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]]
-//      CHECK:     linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-//      CHECK: #[[MULMAP:.+]] = affine_map<()[s0, s1] -> (s0 * s1)>
-//      CHECK: #[[ADDMAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
-//      CHECK: func @matmul_tensors(
-// CHECK-SAME:    %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
-// CHECK-SAME:    %[[TB:[0-9a-z]+]]: tensor<?x?xf32>
-// CHECK-SAME:    %[[TC:[0-9a-z]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
-func.func @matmul_tensors(
-  %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
-    -> tensor<?x?xf32> {
-//  CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-//  CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-//  CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
-//  CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y
-//  CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
-//  CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x
-//      CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]]
-//      CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
-//      CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]]
-//      CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor<?x?xf32>) {
-//      CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDX]], %[[C8]]]
-//      CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
-//      CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
-//      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
-//      CHECK:     %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
-//      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
-//      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
-//      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
-//      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
-  %0 = linalg.matmul {__internal_linalg_transform__ = "tensors_distribute1"}
-       ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2: tensor<?x?xf32>)
-    -> tensor<?x?xf32>
-
-//      CHECK: return %[[TD0]] : tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-

diff  --git a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
deleted file mode 100644
index f8f102e48e0b9..0000000000000
--- a/mlir/test/Dialect/Linalg/tile-and-peel-tensors.mlir
+++ /dev/null
@@ -1,110 +0,0 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=0" -canonicalize | \
-// RUN:     FileCheck %s -check-prefix=CHECK-PEEL-0
-
-// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-pattern tile-sizes=256,128,512 peeled-loops=1,2" -canonicalize | \
-// RUN:     FileCheck %s -check-prefix=CHECK-PEEL-12
-
-//     CHECK-PEEL-0: func @matmul_static_tensor
-// CHECK-PEEL-0-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-0-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-0-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-0-DAG:   %[[c512:.*]] = arith.constant 512 : index
-// CHECK-PEEL-0-DAG:   %[[c1280:.*]] = arith.constant 1280 : index
-// CHECK-PEEL-0-DAG:   %[[c1600:.*]] = arith.constant 1600 : index
-// CHECK-PEEL-0-DAG:   %[[c1700:.*]] = arith.constant 1700 : index
-//     CHECK-PEEL-0:   scf.for %{{.*}} = %[[c0]] to %[[c1280]] step %[[c256]] {{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:       scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:         linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<256x?xf32>)
-//     CHECK-PEEL-0:       }
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-//     CHECK-PEEL-0:   scf.for %{{.*}} = %[[c0]] to %[[c1700]] step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:       linalg.matmul ins({{.*}} : tensor<220x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<220x?xf32>)
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-
-//     CHECK-PEEL-12: func @matmul_static_tensor
-// CHECK-PEEL-12-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-12-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-12-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-12-DAG:   %[[c512:.*]] = arith.constant 512 : index
-// CHECK-PEEL-12-DAG:   %[[c1500:.*]] = arith.constant 1500 : index
-// CHECK-PEEL-12-DAG:   %[[c1536:.*]] = arith.constant 1536 : index
-// CHECK-PEEL-12-DAG:   %[[c1600:.*]] = arith.constant 1600 : index
-// CHECK-PEEL-12-DAG:   %[[c1664:.*]] = arith.constant 1664 : index
-//     CHECK-PEEL-12:   scf.for %{{.*}} = %[[c0]] to %[[c1500]] step %[[c256]] {{.*}} {
-//     CHECK-PEEL-12:     scf.for %{{.*}} = %[[c0]] to %[[c1664]] step %[[c128]] {{.*}} {
-//     CHECK-PEEL-12:       scf.for %{{.*}} = %[[c0]] to %[[c1536]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x512xf32>, tensor<512x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:       linalg.matmul ins({{.*}} : tensor<?x64xf32>, tensor<64x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:     scf.for %{{.*}} = %[[c0]] to %[[c1600]] step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:       linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x36xf32>) outs({{.*}} : tensor<?x36xf32>)
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:   }
-func.func @matmul_static_tensor(%arg0: tensor<1500x1600xf32>, %arg1: tensor<1600x1700xf32>)
-    -> tensor<1500x1700xf32> {
-  %out = tensor.empty() : tensor<1500x1700xf32>
-  %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1: tensor<1500x1600xf32>, tensor<1600x1700xf32>)
-      outs(%out: tensor<1500x1700xf32>) -> tensor<1500x1700xf32>
-  return %r : tensor<1500x1700xf32>
-}
-
-// -----
-
-//     CHECK-PEEL-0: func @matmul_dynamic_tensor
-// CHECK-PEEL-0-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-0-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-0-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-0-DAG:   %[[c512:.*]] = arith.constant 512 : index
-//     CHECK-PEEL-0:   scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:         linalg.matmul ins({{.*}} : tensor<256x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<256x?xf32>)
-//     CHECK-PEEL-0:       }
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-//     CHECK-PEEL-0:   scf.for %{{.*}} {
-//     CHECK-PEEL-0:     scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} {
-//     CHECK-PEEL-0:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-0:         linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<?x?xf32>)
-//     CHECK-PEEL-0:       }
-//     CHECK-PEEL-0:     }
-//     CHECK-PEEL-0:   }
-
-//     CHECK-PEEL-12: func @matmul_dynamic_tensor
-// CHECK-PEEL-12-DAG:   %[[c0:.*]] = arith.constant 0 : index
-// CHECK-PEEL-12-DAG:   %[[c128:.*]] = arith.constant 128 : index
-// CHECK-PEEL-12-DAG:   %[[c256:.*]] = arith.constant 256 : index
-// CHECK-PEEL-12-DAG:   %[[c512:.*]] = arith.constant 512 : index
-//     CHECK-PEEL-12:   scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c256]] {{.*}} {
-//     CHECK-PEEL-12:     scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c128]] {{.*}} {
-//     CHECK-PEEL-12:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x512xf32>, tensor<512x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:       scf.for %{{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x128xf32>) outs({{.*}} : tensor<?x128xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:     scf.for %{{.*}} {
-//     CHECK-PEEL-12:       scf.for %{{.*}} = %[[c0]] to %{{.*}} step %[[c512]] {{.*}} {
-//     CHECK-PEEL-12:         linalg.matmul ins({{.*}} : tensor<?x?xf32>, tensor<?x?xf32>) outs({{.*}} : tensor<?x?xf32>)
-//     CHECK-PEEL-12:       }
-//     CHECK-PEEL-12:     }
-//     CHECK-PEEL-12:   }
-func.func @matmul_dynamic_tensor(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>)
-    -> tensor<?x?xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
-  %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
-  %out = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%out: tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %r : tensor<?x?xf32>
-}

diff  --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
index 028c93a4b1967..f8b1064b54ff1 100644
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" | FileCheck %s
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize | FileCheck %s
 
 //  CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 1)>
 //  CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 - 1, s1 + 2)>
@@ -10,6 +10,12 @@ func.func @conv(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, %arg2 : memref
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1
+    %1, %loop:2 = transform.structured.tile %0 [2, 3]
+}
+
 //       CHECK: func @conv
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>

diff  --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir
index fdca6fbdeec9c..d6e9c0e2a2675 100644
--- a/mlir/test/Dialect/Linalg/tile-indexed.mlir
+++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir
@@ -1,6 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=10,25" -split-input-file | FileCheck %s -check-prefix=TILE-10n25
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=25,0" -split-input-file | FileCheck %s -check-prefix=TILE-25n0
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,25" -split-input-file | FileCheck %s -check-prefix=TILE-0n25
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -split-input-file | FileCheck %s -check-prefix=TILE-10n25
 
 func.func @indexed_vector(%arg0: memref<50xindex>) {
   linalg.generic {indexing_maps = [affine_map<(i) -> (i)>],
@@ -12,6 +10,13 @@ func.func @indexed_vector(%arg0: memref<50xindex>) {
   }
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loop:2 = transform.structured.tile %0 [10, 25]
+}
+
 // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
 // TILE-10n25-LABEL: func @indexed_vector
 // TILE-10n25: %[[C10:.*]] = arith.constant 10 : index
@@ -21,19 +26,6 @@ func.func @indexed_vector(%arg0: memref<50xindex>) {
 // TILE-10n25:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]])
 // TILE-10n25:     linalg.yield %[[NEW_I]] : index
 
-// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// TILE-25n0-LABEL: func @indexed_vector
-// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index
-// TILE-25n0: scf.for %[[J:.*]] = {{.*}} step %[[C25]]
-// TILE-25n0:   linalg.generic
-// TILE-25n0:     %[[I:.*]] = linalg.index 0 : index
-// TILE-25n0:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[J]])
-// TILE-25n0:     linalg.yield %[[NEW_I]] : index
-
-// TILE-0n25-LABEL: func @indexed_vector
-// TILE-0n25-NOT: scf.for %[[J:.*]] = {{.*}} step %
-// TILE-0n25: linalg.generic
-
 // -----
 
 func.func @indexed_matrix(%arg0: memref<50x50xindex>) {
@@ -48,6 +40,13 @@ func.func @indexed_matrix(%arg0: memref<50x50xindex>) {
   }
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loop:2 = transform.structured.tile %0 [10, 25]
+}
+
 // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
 // TILE-10n25-LABEL: func @indexed_matrix
 // TILE-10n25-DAG: %[[C25:.*]] = arith.constant 25 : index
@@ -61,25 +60,3 @@ func.func @indexed_matrix(%arg0: memref<50x50xindex>) {
 // TILE-10n25:       %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]])
 // TILE-10n25:       %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[NEW_J]] : index
 // TILE-10n25:       linalg.yield %[[SUM]] : index
-
-// TILE-25n0-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// TILE-25n0-LABEL: func @indexed_matrix
-// TILE-25n0: %[[C25:.*]] = arith.constant 25 : index
-// TILE-25n0: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
-// TILE-25n0:   linalg.generic
-// TILE-25n0:     %[[I:.*]] = linalg.index 0 : index
-// TILE-25n0:     %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[I]], %[[L]])
-// TILE-25n0:     %[[J:.*]] = linalg.index 1 : index
-// TILE-25n0:     %[[SUM:.*]] = arith.addi %[[NEW_I]], %[[J]] : index
-// TILE-25n0:     linalg.yield %[[SUM]] : index
-
-// TILE-0n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0 + d1)>
-// TILE-0n25-LABEL: func @indexed_matrix
-// TILE-0n25: %[[C25:.*]] = arith.constant 25 : index
-// TILE-0n25: scf.for %[[L:.*]] = {{.*}} step %[[C25]]
-// TILE-0n25:   linalg.generic
-// TILE-0n25:     %[[I:.*]] = linalg.index 0 : index
-// TILE-0n25:     %[[J:.*]] = linalg.index 1 : index
-// TILE-0n25:     %[[NEW_J:.*]] = affine.apply [[$MAP]](%[[J]], %[[L]])
-// TILE-0n25:     %[[SUM:.*]] = arith.addi %[[I]], %[[NEW_J]] : index
-// TILE-0n25:     linalg.yield %[[SUM]] : index

diff  --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
index 6295f9106cf11..74e8ebb250bd8 100644
--- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
+++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
@@ -1,53 +1,65 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3" -cse -split-input-file | \
-// RUN: FileCheck %s -check-prefix=TILE2
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,3" -resolve-shaped-type-result-dims -cse -split-input-file | \
-// RUN: FileCheck %s -check-prefix=TILE1
-// This test only checks that tiling does not crash.
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -resolve-shaped-type-result-dims -cse -split-input-file
-
-//  TILE2-DAG:  #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)>
-//  TILE2-DAG:  #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)>
-//       TILE2: func @dynamic_pad_tensor(
-//  TILE2-SAME:     %[[IN:.*]]: tensor<?x?xf32>
-//   TILE2-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE2-DAG:   %[[C1:.*]] = arith.constant 1 : index
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//       TILE2:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
-//       TILE2:   %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]]
-//       TILE2:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
-//       TILE2:   %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
-//       TILE2:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
-//       TILE2:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE2:       %[[SWAP_RESULT:.*]] = scf.if
-//       TILE2:         tensor.generate
-//       TILE2:       else
-//       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:   return %[[RESULT]]
-
-//   TILE1-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)>
-//   TILE1-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
-//       TILE1: func @dynamic_pad_tensor(
-//  TILE1-SAME:     %[[IN:.*]]: tensor<?x?xf32>
-//   TILE1-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE1-DAG:   %[[C1:.*]] = arith.constant 1 : index
-//   TILE1-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//       TILE1:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
-//       TILE1:   %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]]
-//       TILE1:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
-//       TILE1:   %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]]
-//       TILE1:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE1:     %[[SWAP_RESULT:.*]] = scf.if
-//       TILE1:       tensor.generate
-//       TILE1:     else
-//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
-//       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
-//       TILE1:   return %[[RESULT]]
-
-func.func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -canonicalize -cse -split-input-file
+
+//  CHECK-DAG:  #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 8)>
+//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 7)>
+//       CHECK: func @dynamic_pad_tensor_3_4(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<?x?xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C1:.*]] = arith.constant 1 : index
+//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
+//   CHECK-DAG:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
+//   CHECK-DAG:   %[[DIM0:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN0]]]
+//   CHECK-DAG:   %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
+//       CHECK:     scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:         tensor.generate
+//       CHECK:       else
+//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor<?x?xf32>,
+                         %pad_value: f32) -> tensor<?x?xf32> {
+  %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : f32
+    } : tensor<?x?xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3]
+}
+
+// -----
+
+//   CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 7)>
+//   CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)>
+//       CHECK: func @dynamic_pad_tensor_0_3(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<?x?xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C1:.*]] = arith.constant 1 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[DIM_IN1:.*]] = tensor.dim %[[IN]], %[[C1]]
+//   CHECK-DAG:   %[[DIM1:.*]] = affine.apply #[[MAP0]]()[%[[DIM_IN1]]]
+//   CHECK-DAG:   %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
+//   CHECK-DAG:   %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]]
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:       tensor.generate
+//       CHECK:     else
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
+//       CHECK:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor<?x?xf32>,
                          %pad_value: f32) -> tensor<?x?xf32> {
   %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
     ^bb0(%arg1: index, %arg2: index):
@@ -56,41 +68,64 @@ func.func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
   return %0 : tensor<?x?xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3]
+}
+
+// -----
+
+// CHECK-LABEL: func @static_pad_tensor_3_4(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<7x9xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[C15:.*]] = arith.constant 15 : index
+//   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
+//       CHECK:     scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:       %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:         tensor.generate
+//       CHECK:       else
+//       CHECK:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
+//       CHECK:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>,
+                        %pad_value: f32) -> tensor<15x16xf32> {
+  %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : f32
+    } : tensor<7x9xf32> to tensor<15x16xf32>
+  return %0 : tensor<15x16xf32>
+}
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loops:2 = transform.structured.tile_to_scf_for %0 [2, 3]
+}
+
 // -----
 
-// TILE2-LABEL: func @static_pad_tensor(
-//  TILE2-SAME:     %[[IN:.*]]: tensor<7x9xf32>
-//   TILE2-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//   TILE2-DAG:   %[[C15:.*]] = arith.constant 15 : index
-//   TILE2-DAG:   %[[C16:.*]] = arith.constant 16 : index
-//       TILE2:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
-//       TILE2:     scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE2:       %[[SWAP_RESULT:.*]] = scf.if
-//       TILE2:         tensor.generate
-//       TILE2:       else
-//       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
-//       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:   return %[[RESULT]]
-
-
-// TILE1-LABEL: func @static_pad_tensor(
-//  TILE1-SAME:     %[[IN:.*]]: tensor<7x9xf32>
-//   TILE1-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE1-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//   TILE1-DAG:   %[[C16:.*]] = arith.constant 16 : index
-//       TILE1:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE1:     %[[SWAP_RESULT:.*]] = scf.if
-//       TILE1:       tensor.generate
-//       TILE1:     else
-//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
-//       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
-//       TILE1:   return %[[RESULT]]
-
-func.func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
+// CHECK-LABEL: func @static_pad_tensor_0_3(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<7x9xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
+//       CHECK:   %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:     %[[SWAP_RESULT:.*]] = scf.if
+//       CHECK:       tensor.generate
+//       CHECK:     else
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+//       CHECK:     %[[CAST_SWAP_RESULT:.*]] = tensor.cast %[[SWAP_RESULT]] : tensor<?x?xf32> to tensor<15x?xf32> 
+//       CHECK:     tensor.insert_slice %[[CAST_SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
+//       CHECK:   return %[[RESULT]]
+
+func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>,
                         %pad_value: f32) -> tensor<15x16xf32> {
   %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
     ^bb0(%arg1: index, %arg2: index):
@@ -99,25 +134,35 @@ func.func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
   return %0 : tensor<15x16xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3]
+}
+
 // -----
 
-// TILE1-LABEL: func @static_pad_tile_evenly(
-//  TILE1-SAME:     %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32>
-//   TILE1-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   TILE1-DAG:   %[[C3:.*]] = arith.constant 3 : index
-//   TILE1-DAG:   %[[C15:.*]] = arith.constant 15 : index
-//       TILE1:   %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-//       TILE1:     %[[R2:.*]] = scf.if
-//       TILE1:       %[[GEN:.*]] = tensor.generate
-//       TILE1:       scf.yield %[[GEN]] : tensor<14x3xf32>
-//       TILE1:     else
-//       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
-//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
-//       TILE1:       scf.yield %[[PAD]] : tensor<14x3xf32>
-//       TILE1:     %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
-//       TILE1:     scf.yield %[[R3]] : tensor<14x15xf32>
-//       TILE1:   return %[[RESULT]] : tensor<14x15xf32>
-func.func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>,
+// CHECK-LABEL: func @static_pad_tile_evenly_0_3(
+//  CHECK-SAME:     %[[IN:.*]]: tensor<7x9xf32>, %[[OUT:.*]]: tensor<14x15xf32>
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
+//   CHECK-DAG:   %[[C15:.*]] = arith.constant 15 : index
+//       CHECK:   %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
+//       CHECK:     %[[R2:.*]] = scf.if
+//       CHECK:       %[[GEN:.*]] = tensor.generate
+//       CHECK:       %[[cast_0:.*]] = tensor.cast %[[GEN]] : tensor<14x3xf32> to tensor<?x?xf32>
+//       CHECK:       scf.yield %[[cast_0]] : tensor<?x?xf32>
+//       CHECK:     else
+//       CHECK:       %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
+//       CHECK:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
+//       CHECK:       %[[cast_1:.*]] = tensor.cast %[[PAD]] : tensor<14x?xf32> to tensor<?x?xf32>
+//       CHECK:       scf.yield %[[cast_1]] : tensor<?x?xf32>
+//       CHECK:     %[[cast:.*]] = tensor.cast %[[R2]] : tensor<?x?xf32> to tensor<14x3xf32>
+//       CHECK:     %[[R3:.*]] = tensor.insert_slice %[[cast]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
+//       CHECK:     scf.yield %[[R3]] : tensor<14x15xf32>
+//       CHECK:   return %[[RESULT]] : tensor<14x15xf32>
+
+func.func @static_pad_tile_evenly_0_3(%input_tensor: tensor<7x9xf32>,
                              %output_tensor: tensor<14x15xf32>,
                              %pad_value: f32) -> tensor<14x15xf32> {
   %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] {
@@ -126,3 +171,9 @@ func.func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>,
     } : tensor<7x9xf32> to tensor<14x15xf32>
   return %0 : tensor<14x15xf32>
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    %1, %loop = transform.structured.tile_to_scf_for %0 [0, 3]
+}

diff  --git a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir b/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
deleted file mode 100644
index dcad7a046c399..0000000000000
--- a/mlir/test/Dialect/Linalg/tile-parallel-reduce.mlir
+++ /dev/null
@@ -1,113 +0,0 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4,8 loop-type=parallel" -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE1
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4 loop-type=parallel" -split-input-file | FileCheck %s -check-prefix=TILE2
-
-func.func @gemm(%arg0 : memref<?x?xf32>,
-           %arg1 : memref<?x?xf32>,
-           %arg2 : memref<?x?xf32>)
-{
-  linalg.matmul ins(%arg0, %arg1: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%arg2: memref<?x?xf32>)
-  return
-}
-// CHECK-LABEL: func @gemm
-//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
-//       CHECK:   scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) =
-//  CHECK-SAME:     step (%[[C2]], %[[C4]])
-//       CHECK:     scf.for %[[ARG5:.*]] =
-//  CHECK-SAME:       step %[[C8]]
-//       CHECK:       %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
-//       CHECK:       %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG5]], %[[ARG4]]]
-//       CHECK:       %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
-//       CHECK:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// TILE1-LABEL: func @gemm
-//   TILE1-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//       TILE1:   scf.parallel (%[[ARG3:.*]]) =
-//  TILE1-SAME:     step (%[[C2]])
-//       TILE1:     %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//       TILE1:     %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//   TILE1-NOT:     memref.subview
-//       TILE1:     linalg.matmul ins(%[[SV1]], %{{.*}} outs(%[[SV3]]
-
-// TILE2-LABEL: func @gemm
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//       TILE2:   scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) =
-//  TILE2-SAME:     step (%[[C2]], %[[C4]])
-//       TILE2:       %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//       TILE2:       %[[SV2:.*]] = memref.subview %{{.*}}[0, %[[ARG4]]]
-//       TILE2:       %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]]]
-//       TILE2:       linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]]
-
-// -----
-
-#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
-#map2 = affine_map<(d0, d1, d2) -> (d1)>
-#accesses = [#map0, #map1, #map2]
-#trait = {
-  args_in = 2 : i64,
-  args_out = 1 : i64,
-  iterator_types = ["reduction", "parallel", "reduction"],
-  indexing_maps = #accesses
-}
-
-func.func @reduction(%arg0 : memref<?x?x?xf32>,
-                %arg1 : memref<?x?xf32>,
-                %arg2 : memref<?xf32>)
-{
-  linalg.generic #trait
-    ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?xf32>)
-   outs(%arg2 : memref<?xf32>) {
-  ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
-    %0 = arith.addf %arg3, %arg4 : f32
-    %1 = arith.addf %0, %arg5 : f32
-    linalg.yield %1 : f32
-  }
-  return
-}
-
-// CHECK-LABEL: func @reduction
-//   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
-//       CHECK:   scf.for %[[ARG3:.*]] =
-//  CHECK-SAME:     step %[[C2]]
-//       CHECK:     scf.parallel (%[[ARG4:.*]]) =
-//  CHECK-SAME:       step (%[[C4]])
-//       CHECK:       scf.for %[[ARG5:.*]] =
-//  CHECK-SAME:         step %[[C8]]
-//       CHECK:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], %[[ARG5]]]
-//       CHECK:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG5]]]
-//       CHECK:         %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]]
-//       CHECK:         linalg.generic
-//  CHECK-SAME:           ins(%[[SV1]], %[[SV2]]
-//  CHECK-SAME:          outs(%[[SV3]]
-
-// TILE1-LABEL: func @reduction
-//   TILE1-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//       TILE1:   scf.for %[[ARG3:.*]] =
-//  TILE1-SAME:     step %[[C2]]
-//       TILE1:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0, 0]
-//       TILE1:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//   TILE1-NOT:         memref.subview
-//       TILE1:         linalg.generic
-//  TILE1-SAME:           ins(%[[SV1]], %[[SV2]]
-//  TILE1-SAME:          outs(%{{.*}}
-
-// TILE2-LABEL: func @reduction
-//   TILE2-DAG:   %[[C2:.*]] = arith.constant 2 : index
-//   TILE2-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//       TILE2:   scf.for %[[ARG3:.*]] =
-//  TILE2-SAME:     step %[[C2]]
-//       TILE2:     scf.parallel (%[[ARG4:.*]]) =
-//  TILE2-SAME:       step (%[[C4]])
-//       TILE2:         %[[SV1:.*]] = memref.subview %{{.*}}[%[[ARG3]], %[[ARG4]], 0]
-//       TILE2:         %[[SV2:.*]] = memref.subview %{{.*}}[%[[ARG3]], 0]
-//       TILE2:         %[[SV3:.*]] = memref.subview %{{.*}}[%[[ARG4]]]
-//       TILE2:         linalg.generic
-//  TILE2-SAME:           ins(%[[SV1]], %[[SV2]]
-//  TILE2-SAME:          outs(%[[SV3]]

diff  --git a/mlir/test/Dialect/Linalg/tile-parallel.mlir b/mlir/test/Dialect/Linalg/tile-parallel.mlir
deleted file mode 100644
index cf346f938be32..0000000000000
--- a/mlir/test/Dialect/Linalg/tile-parallel.mlir
+++ /dev/null
@@ -1,68 +0,0 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-2
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-02
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2 loop-type=parallel" | FileCheck %s -check-prefix=TILE-002
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4 loop-type=parallel" | FileCheck %s -check-prefix=TILE-234
-
-#id_2d = affine_map<(i, j) -> (i, j)>
-#pointwise_2d_trait = {
-  args_in = 2,
-  args_out = 1,
-  indexing_maps = [#id_2d, #id_2d, #id_2d],
-  iterator_types = ["parallel", "parallel"]
-}
-
-func.func @sum(%lhs: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-          %rhs: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-          %sum: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.generic #pointwise_2d_trait
-     ins(%lhs, %rhs: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                     memref<?x?xf32, strided<[?, 1], offset: ?>>)
-    outs(%sum : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):
-    %result = arith.addf %lhs_in, %rhs_in : f32
-    linalg.yield %result : f32
-  }
-  return
-}
-// TILE-2-LABEL: func @sum(
-// TILE-2-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-2-DAG: [[C0:%.*]] = arith.constant 0 : index
-// TILE-2-DAG: [[C2:%.*]] = arith.constant 2 : index
-// TILE-2: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0
-// TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
-// TILE-2-NO: scf.parallel
-// TILE-2:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
-// TILE-2:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
-// TILE-2:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
-// TILE-2:   linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
-
-// TILE-02-LABEL: func @sum(
-// TILE-02-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-02-DAG: [[C0:%.*]] = arith.constant 0 : index
-// TILE-02-DAG: [[C2:%.*]] = arith.constant 2 : index
-// TILE-02: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1
-// TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
-// TILE-02-NO: scf.parallel
-// TILE-02:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
-// TILE-02:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
-// TILE-02:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
-// TILE-02:    linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]
-
-// TILE-002-LABEL: func @sum(
-// TILE-002-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-002-NO: scf.parallel
-// TILE-002:   linalg.generic {{.*}} ins([[LHS]], [[RHS]]{{.*}} outs([[SUM]]
-
-// TILE-234-LABEL: func @sum(
-// TILE-234-SAME:    [[LHS:%.*]]: memref{{.*}}, [[RHS:%.*]]: memref{{.*}}, [[SUM:%.*]]: memref{{.*}}) {
-// TILE-234-DAG: [[C0:%.*]] = arith.constant 0 : index
-// TILE-234-DAG: [[C2:%.*]] = arith.constant 2 : index
-// TILE-234-DAG: [[C3:%.*]] = arith.constant 3 : index
-// TILE-234: [[LHS_ROWS:%.*]] = memref.dim [[LHS]], %c0
-// TILE-234: [[LHS_COLS:%.*]] = memref.dim [[LHS]], %c1
-// TILE-234: scf.parallel ([[I:%.*]], [[J:%.*]]) = ([[C0]], [[C0]]) to ([[LHS_ROWS]], [[LHS_COLS]]) step ([[C2]], [[C3]]) {
-// TILE-234-NO: scf.parallel
-// TILE-234:   [[LHS_SUBVIEW:%.*]] = memref.subview [[LHS]]
-// TILE-234:   [[RHS_SUBVIEW:%.*]] = memref.subview [[RHS]]
-// TILE-234:   [[SUM_SUBVIEW:%.*]] = memref.subview [[SUM]]
-// TILE-234:   linalg.generic {{.*}} ins([[LHS_SUBVIEW]], [[RHS_SUBVIEW]]{{.*}} outs([[SUM_SUBVIEW]]

diff  --git a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir b/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
deleted file mode 100644
index 9697adf70e086..0000000000000
--- a/mlir/test/Dialect/Linalg/tile-scalarize-dynamic-dims.mlir
+++ /dev/null
@@ -1,74 +0,0 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns="test-tile-scalarize-dynamic-dims" -scf-for-loop-canonicalization -canonicalize -split-input-file | \
-// RUN:     FileCheck %s
-
-// CHECK-LABEL: func @matmul_partly_dynamic_tensor(
-//  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x2000xf32>
-//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   CHECK-DAG:   %[[C1:.*]] = arith.constant 1 : index
-//       CHECK:   tensor.dim %[[ARG0]], %[[C0]] : tensor<?x?xf32>
-//       CHECK:   %[[UB1:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor<?x?xf32>
-//       CHECK:   %[[UB2:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<?x?xf32>
-//       CHECK:   scf.for %[[IV0:.*]] = %[[C0]] to %[[UB1]] step %[[C1]]
-//       CHECK:     scf.for %[[IV1:.*]] = %[[C0]] to %[[UB2]] step %[[C1]]
-//       CHECK:       %[[S1:.*]] = tensor.extract_slice %[[ARG0]][%[[IV0]], %[[IV1]]] [1, 1] [1, 1] : tensor<?x?xf32> to tensor<1x1xf32>
-//       CHECK:       %[[S2:.*]] = tensor.extract_slice %[[ARG1]][%[[IV1]], 0] [1, 2000] [1, 1] : tensor<?x2000xf32> to tensor<1x2000xf32>
-//       CHECK:       %[[S3:.*]] = tensor.extract_slice %{{.*}}[%[[IV0]], 0] [1, 2000] [1, 1] : tensor<?x2000xf32> to tensor<1x2000xf32>
-//       CHECK:       linalg.matmul ins(%[[S1]], %[[S2]] : tensor<1x1xf32>, tensor<1x2000xf32>) outs(%[[S3]] : tensor<1x2000xf32>) -> tensor<1x2000xf32>
-func.func @matmul_partly_dynamic_tensor(%arg0: tensor<?x?xf32>, %arg1: tensor<?x2000xf32>)
-    -> tensor<?x2000xf32> {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
-  %out = tensor.empty(%d0) : tensor<?x2000xf32>
-  %r = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x2000xf32>)
-      outs(%out: tensor<?x2000xf32>) -> tensor<?x2000xf32>
-  return %r : tensor<?x2000xf32>
-}
-
-// -----
-
-// The input IR of this test case is a tiled and peeled linalg.matmul op.
-
-// CHECK-LABEL: func @tiled_and_peeled_matmul(
-//       CHECK:   linalg.matmul ins({{.*}} : tensor<32x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<32x258xf32>) -> tensor<32x258xf32>
-//       CHECK:   linalg.matmul ins({{.*}} : tensor<1x259xf32>, tensor<259x258xf32>) outs({{.*}} : tensor<1x258xf32>) -> tensor<1x258xf32>
-#map0 = affine_map<(d0) -> (64, -d0 + 257)>
-#map1 = affine_map<()[s0] -> ((s0 floordiv 32) * 32)>
-#map2 = affine_map<(d0)[s0] -> (d0 - (s0 floordiv 32) * 32)>
-
-func.func @tiled_and_peeled_matmul(%arg0: tensor<257x259xf32>, %arg1: tensor<259x258xf32>, %arg2: tensor<257x258xf32>) -> tensor<257x258xf32> {
-  %c257 = arith.constant 257 : index
-  %c64 = arith.constant 64 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %c0 = arith.constant 0 : index
-  %c32 = arith.constant 32 : index
-  %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<257x258xf32>) -> tensor<257x258xf32>
-  %1 = scf.for %arg3 = %c0 to %c257 step %c64 iter_args(%arg4 = %0) -> (tensor<257x258xf32>) {
-    %2 = affine.min #map0(%arg3)
-    %3 = tensor.extract_slice %arg0[%arg3, 0] [%2, 259] [1, 1] : tensor<257x259xf32> to tensor<?x259xf32>
-    %4 = tensor.extract_slice %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<257x258xf32> to tensor<?x258xf32>
-    %5 = affine.apply #map1()[%2]
-    %6 = scf.for %arg5 = %c0 to %5 step %c32 iter_args(%arg6 = %4) -> (tensor<?x258xf32>) {
-      %10 = tensor.extract_slice %3[%arg5, 0] [32, 259] [1, 1] : tensor<?x259xf32> to tensor<32x259xf32>
-      %11 = tensor.extract_slice %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<?x258xf32> to tensor<32x258xf32>
-      %12 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%10, %arg1 : tensor<32x259xf32>, tensor<259x258xf32>) outs(%11 : tensor<32x258xf32>) -> tensor<32x258xf32>
-      %13 = tensor.insert_slice %12 into %arg6[%arg5, 0] [32, 258] [1, 1] : tensor<32x258xf32> into tensor<?x258xf32>
-      scf.yield %13 : tensor<?x258xf32>
-    }
-    %7 = arith.cmpi slt, %5, %2 : index
-    %8 = scf.if %7 -> (tensor<?x258xf32>) {
-      %10 = affine.apply #map2(%2)[%2]
-      %11 = tensor.extract_slice %3[%5, 0] [%10, 259] [1, 1] : tensor<?x259xf32> to tensor<?x259xf32>
-      %12 = tensor.extract_slice %6[%5, 0] [%10, 258] [1, 1] : tensor<?x258xf32> to tensor<?x258xf32>
-      %13 = linalg.matmul {__internal_linalg_transform__ = "tile"} ins(%11, %arg1 : tensor<?x259xf32>, tensor<259x258xf32>) outs(%12 : tensor<?x258xf32>) -> tensor<?x258xf32>
-      %14 = tensor.insert_slice %13 into %6[%5, 0] [%10, 258] [1, 1] : tensor<?x258xf32> into tensor<?x258xf32>
-      scf.yield %14 : tensor<?x258xf32>
-    } else {
-      scf.yield %6 : tensor<?x258xf32>
-    }
-    %9 = tensor.insert_slice %8 into %arg4[%arg3, 0] [%2, 258] [1, 1] : tensor<?x258xf32> into tensor<257x258xf32>
-    scf.yield %9 : tensor<257x258xf32>
-  }
-  return %1 : tensor<257x258xf32>
-}

diff  --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
index 736a0e9524567..b87d72866a2fd 100644
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @matmul_tensors(
 // CHECK-SAME:    %[[TA:[0-9a-z]+]]: tensor<?x?xf32>
@@ -27,6 +27,12 @@ func.func @matmul_tensors(
   return %0 : tensor<?x?xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 3, 4]
+}
+
 // -----
 
 func.func @generic_op_tensors(
@@ -52,6 +58,12 @@ func.func @generic_op_tensors(
   return %4 : tensor<?x?x?xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 3, 4]
+}
+
 // CHECK-LABEL: func @generic_op_tensors
 //  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
 //  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
@@ -117,3 +129,8 @@ func.func @fold_extract_slice(
   return %2 : tensor<?x42xf32>
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 3, 4]
+}

diff  --git a/mlir/test/Dialect/Linalg/tile-zero.mlir b/mlir/test/Dialect/Linalg/tile-zero.mlir
deleted file mode 100644
index 147b7c7d377d6..0000000000000
--- a/mlir/test/Dialect/Linalg/tile-zero.mlir
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: mlir-opt -test-linalg-transform-patterns=test-tile-pattern %s | FileCheck %s
-
-func.func @matmul_zero_tile(
-  %arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
-  %0 = linalg.matmul {__internal_linalg_transform__ = "tile"}
-      ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-// CHECK-LABEL: matmul_zero_tile
-//       CHECK:   linalg.matmul
-//   CHECK-NOT:   __internal_linalg_transform__

diff  --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir
deleted file mode 100644
index 0fc2ca6efbbc1..0000000000000
--- a/mlir/test/Dialect/Linalg/tile.mlir
+++ /dev/null
@@ -1,331 +0,0 @@
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-2
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-02
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-002
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-234
-
-//   TILE-2-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-//  TILE-02-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// TILE-002-DAG: #[[$bound_map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// TILE-234-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-// TILE-234-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 3)>
-// TILE-234-DAG: #[[$bound_map_4:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 4)>
-
-func.func @matmul(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-             %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-             %arg2: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul
-    ins(%arg0, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                      memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%arg2: memref<?x?xf32, strided<[?, 1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @matmul(
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]]
-
-// TILE-02-LABEL: func @matmul(
-//       TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-02: %[[N:.*]] = memref.dim %arg1, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       TILE-02:   %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
-//       TILE-02:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
-//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
-
-// TILE-002-LABEL: func @matmul(
-//       TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-002: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-002:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
-//       TILE-002:   %[[szK_1:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
-//       TILE-002:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK_1]], %[[N]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
-
-// TILE-234-LABEL: func @matmul(
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index
-//       TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index
-//       TILE-234: %[[ubM:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234: %[[ubN:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
-//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
-//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-234:        %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
-//       TILE-234:        %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
-//       TILE-234:        %[[szK_1:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
-//       TILE-234:        %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
-//       TILE-234:        %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
-//       TILE-234:        %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
-//       TILE-234:        %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK_1]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM_1]], %[[szN_1]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//
-//       TILE-234:        linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
-
-// When the buffer shapes are known at compile time, it is possible to avoid
-// the "min" in subview size computation. This test uses buffer sizes divisible
-// by respective tile sizes (M=10 divisble by 2, N=12 divisible by 2 and 3,
-// K=16 divisble by 2 and 4).
-func.func @matmul_static(%arg0: memref<10x16xf32, strided<[?, 1], offset: ?>>,
-                    %arg1: memref<16x12xf32, strided<[?, 1], offset: ?>>,
-                    %arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul
-    ins(%arg0, %arg1: memref<10x16xf32, strided<[?, 1], offset: ?>>,
-                      memref<16x12xf32, strided<[?, 1], offset: ?>>)
-   outs(%arg2: memref<10x12xf32, strided<[?, 1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @matmul_static(
-//  TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2-DAG: %[[M:.*]] = arith.constant 10 : index
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 16] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x16xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [2, 12] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   linalg.matmul ins(%[[sAi]], %{{.*}}{{.*}} outs(%[[sCi]]
-
-// TILE-02-LABEL: func @matmul_static(
-//       TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-02-DAG: %[[N:.*]] = arith.constant 12 : index
-//       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [16, 2] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<16x2xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [10, 2] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
-
-// TILE-002-LABEL: func @matmul_static(
-//       TILE-002-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-002-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-002-DAG: %[[C16:.*]] = arith.constant 16 : index
-//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
-//       TILE-002:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [10, 2] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<10x2xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [2, 12] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<2x12xf32, strided<[?, 1], offset: ?>>
-//       TILE-002:   linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
-
-// TILE-234-LABEL: func @matmul_static(
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index
-//       TILE-234-DAG: %[[C4:.*]] = arith.constant 4 : index
-//       TILE-234-DAG: %[[C10:.*]] = arith.constant 10 : index
-//       TILE-234-DAG: %[[C16:.*]] = arith.constant 16 : index
-//       TILE-234-DAG: %[[C12:.*]] = arith.constant 12 : index
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[C10]] step %{{.*}} {
-//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[C12]] step %{{.*}} {
-//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
-//       TILE-234:        %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [2, 4] [1, 1] : memref<10x16xf32, strided<[?, 1], offset: ?>> to memref<2x4xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [4, 3] [1, 1] : memref<16x12xf32, strided<[?, 1], offset: ?>> to memref<4x3xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:        %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [2, 3] [1, 1] : memref<10x12xf32, strided<[?, 1], offset: ?>> to memref<2x3xf32, strided<[?, 1], offset: ?>>
-//
-//       TILE-234:        linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
-
-func.func @matvec(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<?xf32, strided<[1], offset: ?>>) {
-  linalg.matvec
-    ins(%arg0, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                      memref<?xf32, strided<[1], offset: ?>>)
-   outs(%arg2: memref<?xf32, strided<[1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @matvec(
-//  TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-//  TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-2:   %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2:   linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]]
-
-// TILE-02-LABEL: func @matvec(
-// TILE-02-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-// TILE-02-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-// TILE-02-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-02-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-02-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
-//       TILE-02:   %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
-//       TILE-02:   %[[szN_1:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
-//       TILE-02:   %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-02:   %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-02:   linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
-
-// TILE-002-LABEL: func @matvec(
-// TILE-002-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-// TILE-002-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-// TILE-002-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//   TILE-002-NOT: scf.for
-
-// TILE-234-LABEL: func @matvec(
-// TILE-234-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
-// TILE-234-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
-// TILE-234-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234-DAG: %[[C3:.*]] = arith.constant 3 : index
-//       TILE-234: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
-//       TILE-234:      %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
-//       TILE-234:      %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
-//       TILE-234:      %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
-//       TILE-234:      %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
-//       TILE-234:      %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-//       TILE-234:      %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:      %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//
-//       TILE-234:      linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]]
-
-func.func @dot(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<f32>) {
-  linalg.dot
-    ins(%arg0, %arg1: memref<?xf32, strided<[1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
-   outs(%arg2: memref<f32>)
-  return
-}
-// TILE-2-LABEL: func @dot(
-//       TILE-2-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-2-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[szM_1:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
-//       TILE-2:   %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2:   %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-2:   linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
-
-// TILE-02-LABEL: func @dot(
-//   TILE-02-NOT: scf.for
-
-// TILE-002-LABEL: func @dot(
-//   TILE-002-NOT: scf.for
-
-// TILE-234-LABEL: func @dot(
-//       TILE-234-DAG: %[[C0:.*]] = arith.constant 0 : index
-//       TILE-234-DAG: %[[C2:.*]] = arith.constant 2 : index
-//       TILE-234:  %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-234:    %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
-//       TILE-234:    %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
-//       TILE-234:    %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:    %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, strided<[1], offset: ?>> to memref<?xf32, strided<[1], offset: ?>>
-//       TILE-234:    linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
-
-func.func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<127x99xf32>)
-  return
-}
-// TILE-2-LABEL: func @fill_static
-//       TILE-2:   for
-//   TILE-2-NOT:   for
-//       TILE-2:       memref.subview{{.*}} : memref<127x99xf32>
-//       TILE-2:       linalg.fill{{.*}} : memref<?x99xf32, strided<[99, 1], offset: ?>>
-
-// TILE-02-LABEL: func @fill_static
-//       TILE-02:   for
-//   TILE-02-NOT:   for
-//       TILE-02:       memref.subview{{.*}} : memref<127x99xf32>
-//       TILE-02:       linalg.fill{{.*}} : memref<127x?xf32, strided<[99, 1], offset: ?>>
-
-// TILE-002-LABEL: func @fill_static
-//   TILE-002-NOT:   for
-//       TILE-002:     linalg.fill{{.*}} : memref<127x99xf32>
-
-// TILE-234-LABEL: func @fill_static
-//       TILE-234:   for
-//       TILE-234:     for
-//   TILE-234-NOT:   for
-//       TILE-234:       memref.subview{{.*}} : memref<127x99xf32>
-//       TILE-234:       linalg.fill{{.*}} : memref<?x3xf32, strided<[99, 1], offset: ?>>
-
-
-func.func @fill(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
-  return
-}
-// TILE-2-LABEL: func @fill
-//       TILE-2:   for
-//   TILE-2-NOT:   for
-//       TILE-2:   fill{{.*}} f32
-
-// TILE-02-LABEL: func @fill
-//       TILE-02:   for
-//   TILE-02-NOT:   for
-//       TILE-02:     fill{{.*}} f32
-
-// TILE-002-LABEL: func @fill
-//   TILE-002-NOT:   for
-//       TILE-002:     fill{{.*}} f32
-
-// TILE-234-LABEL: func @fill
-//       TILE-234:   for
-//       TILE-234:     for
-//   TILE-234-NOT:   for
-//       TILE-234:       fill{{.*}} f32
-
-#id_2d = affine_map<(i, j) -> (i, j)>
-#pointwise_2d_trait = {
-  args_in = 2,
-  args_out = 1,
-  indexing_maps = [#id_2d, #id_2d, #id_2d],
-  iterator_types = ["parallel", "parallel"]
-}
-
-func.func @pointwise(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                %arg2: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.generic #pointwise_2d_trait
-    ins(%arg0, %arg1 : memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
-    outs(%arg2 : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  ^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
-    %4 = arith.addf %arg4, %arg5 : f32
-    linalg.yield %4 : f32
-  }
-  return
-}
-// TILE-2-LABEL: func @pointwise
-//       TILE-2:   for
-//   TILE-2-NOT:   for
-//       TILE-2:   linalg.generic
-
-// TILE-02-LABEL: func @pointwise
-//       TILE-02:   for
-//   TILE-02-NOT:   for
-//       TILE-02:     linalg.generic
-
-// TILE-002-LABEL: func @pointwise
-//   TILE-002-NOT:   for
-//       TILE-002:     linalg.generic
-
-// TILE-234-LABEL: func @pointwise
-//       TILE-234:   for
-//       TILE-234:     for
-//   TILE-234-NOT:   for
-//       TILE-234:       linalg.generic

diff  --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
index 3502f99581334..ad3271cd9a149 100644
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -1,20 +1,22 @@
-// RUN: mlir-opt %s -test-linalg-transform-patterns=test-patterns -split-input-file -test-transform-dialect-interpreter | FileCheck %s
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-linalg-transform-patterns=test-patterns -split-input-file | FileCheck %s
 
-// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1.
-// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
-// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)>
-// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)>
+// -----
 
 func.func @dot(%x: memref<?xf32, strided<[1], offset: ?>>,
           %y: memref<?xf32, strided<[1], offset: ?>>,
           %v: memref<f32>) {
-  linalg.dot { __internal_linalg_transform__ = "MEM" }
-    ins(%x, %y: memref<?xf32, strided<[1], offset: ?>>,
-                memref<?xf32, strided<[1], offset: ?>>)
-    outs(%v: memref<f32>)
-
+  linalg.dot ins(%x, %y: memref<?xf32, strided<[1], offset: ?>>,
+                         memref<?xf32, strided<[1], offset: ?>>)
+            outs(%v: memref<f32>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.dot"]} in %arg1
+    %1, %loop = transform.structured.tile %0 [8000]
+}
+
 // CHECK-LABEL: func @dot
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c1:.*]] = arith.constant 1 : index
@@ -28,6 +30,8 @@ func.func @dot(%x: memref<?xf32, strided<[1], offset: ?>>,
 // CHECK:               arith.addf
 // CHECK:               store
 
+// -----
+
 func.func @matvec(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %x: memref<?xf32, strided<[1], offset: ?>>,
              %y: memref<?xf32, strided<[1], offset: ?>>) {
@@ -37,25 +41,43 @@ func.func @matvec(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
     outs(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [5, 6]
+}
+
 // CHECK-LABEL: func @matvec
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = arith.constant 5 : index
 // CHECK-DAG:     %[[c6:.*]] = arith.constant 6 : index
-// CHECK:         scf.parallel {{.*}} step (%[[c5]])
+// CHECK:         scf.for {{.*}} step %[[c5]]
 // CHECK:           scf.for {{.*}} step %[[c6]]
 // CHECK:             linalg.matvec
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
 // CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
+// -----
+
 func.func @matmul(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul { __internal_linalg_transform__ = "MEM" }
-    ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                memref<?x?xf32, strided<[?, 1], offset: ?>>)
-    outs(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+  linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
+                            memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               outs(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000]
+    %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400]
+    %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40]
+    %4, %loops_4:3 = transform.structured.tile %3 [2, 3, 4]
+}
+
 // CHECK-LABEL: func @matmul
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c2:.*]] = arith.constant 2 : index
@@ -86,6 +108,13 @@ func.func @matmul(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
 // CHECK:                                   ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
 // CHECK:                                  outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
 
+// -----
+
+// Map corresponding to a 2D memory access where the stride along the last dim is known to be 1.
+// CHECK-DAG: #[[$kn:.*]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$nm:.*]] = affine_map<(d0, d1, d2) -> (d1, d0)>
+// CHECK-DAG: #[[$km:.*]] = affine_map<(d0, d1, d2) -> (d2, d0)>
+
 #matmul_accesses = [
   affine_map<(m, n, k) -> (m, k)>,
   affine_map<(m, n, k) -> (k, n)>,
@@ -112,6 +141,7 @@ func.func @permute_generic(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
   }
   return
 }
+
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
   transform.sequence %arg0 failures(propagate) {
@@ -120,6 +150,7 @@ transform.with_pdl_patterns {
     transform.structured.interchange %0 { iterator_interchange = [1, 2, 0]}
   }
 }
+
 // CHECK-LABEL:  func @permute_generic
 // CHECK:        linalg.generic {
 // CHECK-SAME:   indexing_maps = [#[[$kn]], #[[$nm]], #[[$km]]],
@@ -129,15 +160,23 @@ transform.with_pdl_patterns {
 // CHECK-SAME:     memref<?x?xf32, strided<[?, 1], offset: ?>>
 // CHECK-SAME:     memref<?x?xf32, strided<[?, 1], offset: ?>>
 
+// -----
+
 func.func @matvec_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %x: memref<?xf32, strided<[1], offset: ?>>,
              %y: memref<?xf32, strided<[1], offset: ?>>) {
-  linalg.matvec {__internal_linalg_transform__ = "__with_perm__"}
-    ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                memref<?xf32, strided<[1], offset: ?>>)
-   outs(%y: memref<?xf32, strided<[1], offset: ?>>)
+  linalg.matvec ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
+                            memref<?xf32, strided<[1], offset: ?>>)
+               outs(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [5, 6] {interchange = [1, 0]}
+}
+
 // CHECK-LABEL: func @matvec_perm
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c5:.*]] = arith.constant 5 : index
@@ -148,15 +187,25 @@ func.func @matvec_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
 // CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
+// -----
+
 func.func @matmul_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
-  linalg.matmul {__internal_linalg_transform__ = "__with_perm__"}
-    ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
-                memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+  linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
+                            memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
+
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2000, 3000, 4000] {interchange=[1, 2, 0]}
+    %2, %loops_2:3 = transform.structured.tile %1 [200, 300, 400] {interchange=[1, 0, 2]}
+    %3, %loops_3:3 = transform.structured.tile %2 [20, 30, 40]
+}
+
 // CHECK-LABEL: func @matmul_perm
 // CHECK-DAG:     %[[c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[c20:.*]] = arith.constant 20 : index
@@ -180,26 +229,3 @@ func.func @matmul_perm(%A: memref<?x?xf32, strided<[?, 1], offset: ?>>,
 // CHECK:                                 linalg.matmul
 // CHECK:                                  ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
 // CHECK:                                   outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
-
-func.func @tile_permute_parallel_loop(%arg0: memref<?x?xf32>,
-                                 %arg1: memref<?x?xf32>,
-                                 %arg2: memref<?x?xf32>) {
-  linalg.matmul {__internal_linalg_transform__ = "par__with_perm__"}
-    ins(%arg0, %arg1: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%arg2: memref<?x?xf32>)
-  return
-}
-// CHECK-LABEL: func @tile_permute_parallel_loop
-//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//  CHECK-SAME:   %[[ARG2:[a-zA-Z0-9_]+]]: memref<?x?xf32>
-//   CHECK-DAG:   %[[C16:.*]] = arith.constant 16 : index
-//   CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
-//   CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
-//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   CHECK-DAG:   %[[D0:.*]] = memref.dim %[[ARG0]], %c0
-//   CHECK-DAG:   %[[D1:.*]] = memref.dim %[[ARG0]], %c1
-//   CHECK-DAG:   %[[D2:.*]] = memref.dim %[[ARG1]], %c1
-//       CHECK:   scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D2]]) step (%[[C8]])
-//       CHECK:     scf.for %{{.*}} = %[[C0]] to %[[D1]] step %[[C4]]
-//       CHECK:       scf.parallel (%{{.*}}) = (%[[C0]]) to (%[[D0]]) step (%[[C16]])

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
index 10c9adb7a04f8..d79b402b1f1fd 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=4" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -24,6 +24,12 @@ func.func @conv_1d(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_1d"]} in %arg1
+    %1, %loop = transform.structured.tile %0 [4]
+}
+
 func.func @main() {
   %c3 = arith.constant 3 : index
   %c6 = arith.constant 6 : index

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
index 7d6e47b5f68e2..cb9a033883efd 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -26,6 +26,12 @@ func.func @conv_1d_nwc_wcf(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_1d_nwc_wcf"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [2, 4]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
index 21d2a1939a018..78175ba2f86a4 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -24,6 +24,12 @@ func.func @conv_2d(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_2d"]} in %arg1
+    %1, %loops:2 = transform.structured.tile %0 [2, 2]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
index 51708ea589607..b675f870a7e04 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -26,6 +26,12 @@ func.func @conv_2d_nhwc_hwcf(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_2d_nhwc_hwcf"]} in %arg1
+    %1, %loops:4 = transform.structured.tile %0 [2, 3, 3, 2]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
index 15cf0031094f5..361869e666834 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=2,2,2" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -24,6 +24,12 @@ func.func @conv_3d(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: me
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_3d"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [2, 2, 2]
+}
+
 func.func @main() {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
index a4a51b8de42ac..d7245d36c1323 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
@@ -1,9 +1,9 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s -linalg-tile="tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-cf \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN:   -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \
@@ -26,6 +26,11 @@ func.func @conv_3d_ndhwc_dhwcf(%arg0: memref<?x?x?x?x?xf32>, %arg1: memref<?x?x?
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.conv_3d_ndhwc_dhwcf"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [0, 5, 5, 5]
+}
 
 func.func @main() {
   %c0 = arith.constant 0 : index

diff  --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
index a7f2b415f1f49..c35ad80505f76 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -1,12 +1,12 @@
 // UNSUPPORTED: asan
-// RUN: mlir-opt %s -linalg-bufferize -arith-bufferize \
+// RUN: mlir-opt %s -test-transform-dialect-erase-schedule -linalg-bufferize -arith-bufferize \
 // RUN: -tensor-bufferize -func-bufferize -finalizing-bufferize -buffer-deallocation -convert-linalg-to-loops -convert-scf-to-cf \
 // RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-cf --convert-memref-to-llvm -convert-func-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:   -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext \
 // RUN: | FileCheck %s
 
-// RUN: mlir-opt %s  -linalg-tile="tile-sizes=1,2,3" -linalg-bufferize \
+// RUN: mlir-opt %s -test-transform-dialect-interpreter -test-transform-dialect-erase-schedule -linalg-bufferize \
 // RUN: -scf-bufferize -arith-bufferize -tensor-bufferize \
 // RUN: -func-bufferize \
 // RUN: -finalizing-bufferize -convert-linalg-to-loops -convert-scf-to-cf -convert-scf-to-cf \
@@ -36,4 +36,10 @@ func.func @main() {
   return
 }
 
+transform.sequence failures(propagate) {
+  ^bb0(%arg1: !pdl.operation):
+    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
+    %1, %loops:3 = transform.structured.tile %0 [1, 2, 3]
+}
+
 func.func private @printMemrefF32(%ptr : tensor<*xf32>)

diff  --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index 781936f56f61c..ad27637c49ea3 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -61,10 +61,6 @@ struct TestLinalgTransforms
   Option<bool> testPatterns{*this, "test-patterns",
                             llvm::cl::desc("Test a mixed set of patterns"),
                             llvm::cl::init(false)};
-  Option<bool> testTileAndDistributionOptions{
-      *this, "test-tile-and-distribute-options",
-      llvm::cl::desc("Test tile and distribute options"),
-      llvm::cl::init(false)};
   Option<bool> testVectorTransferForwardingPatterns{
       *this, "test-vector-transfer-forwarding-patterns",
       llvm::cl::desc(
@@ -75,13 +71,6 @@ struct TestLinalgTransforms
       llvm::cl::desc("Test a set of patterns that rewrite a linalg contraction "
                      "in vector.contract form"),
       llvm::cl::init(false)};
-  Option<bool> testTilePattern{*this, "test-tile-pattern",
-                               llvm::cl::desc("Test tile pattern"),
-                               llvm::cl::init(false)};
-  Option<bool> testTileScalarizeDynamicDims{
-      *this, "test-tile-scalarize-dynamic-dims",
-      llvm::cl::desc("Test tiling of dynamic dims by 1"),
-      llvm::cl::init(false)};
   Option<bool> testTransformPadTensor{
       *this, "test-transform-pad-tensor",
       llvm::cl::desc("Test transform pad tensor by copying with generic ops"),
@@ -135,91 +124,12 @@ static void applyPatterns(func::FuncOp funcOp) {
   MLIRContext *ctx = funcOp.getContext();
   RewritePatternSet patterns(ctx);
 
-  //===--------------------------------------------------------------------===//
-  // Linalg tiling patterns.
-  //===--------------------------------------------------------------------===//
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({2000, 3000, 4000}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "MEM"),
-                                 StringAttr::get(ctx, "L3")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({200, 300, 400}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L3"),
-                                 StringAttr::get(ctx, "L2")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({20, 30, 40}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L2"),
-                                 StringAttr::get(ctx, "L1")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({2, 3, 4}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L1"),
-                                 StringAttr::get(ctx, "REG")));
-
-  patterns.add<LinalgTilingPattern>(
-      MatvecOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({5, 6}).setLoopType(
-          LinalgTilingLoopType::ParallelLoops),
-      LinalgTransformationFilter(ArrayRef<StringAttr>{},
-                                 StringAttr::get(ctx, "L1")));
-
-  patterns.add<LinalgTilingPattern>(
-      DotOp::getOperationName(), ctx, LinalgTilingOptions().setTileSizes(8000),
-      LinalgTransformationFilter(
-          ArrayRef<StringAttr>{StringAttr::get(ctx, "MEM"),
-                               StringAttr::get(ctx, "L3"),
-                               StringAttr::get(ctx, "L2")},
-          StringAttr::get(ctx, "REG")));
-
-  //===--------------------------------------------------------------------===//
-  // Linalg tiling and permutation patterns.
-  //===--------------------------------------------------------------------===//
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions()
-          .setTileSizes({2000, 3000, 4000})
-          .setInterchange({1, 2, 0}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"),
-                                 StringAttr::get(ctx, "L2__with_perm__")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions()
-          .setTileSizes({200, 300, 400})
-          .setInterchange({1, 0, 2}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L2__with_perm__"),
-                                 StringAttr::get(ctx, "L1__with_perm__")));
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({20, 30, 40}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "L1__with_perm__"),
-                                 StringAttr::get(ctx, "REG__with_perm__")));
-
-  patterns.add<LinalgTilingPattern>(
-      MatvecOp::getOperationName(), ctx,
-      LinalgTilingOptions().setTileSizes({5, 6}).setInterchange({1, 0}),
-      LinalgTransformationFilter(StringAttr::get(ctx, "__with_perm__"),
-                                 StringAttr::get(ctx, "L1__with_perm__")));
-
-  patterns.add<LinalgTilingPattern>(
-      MatmulOp::getOperationName(), ctx,
-      LinalgTilingOptions()
-          .setTileSizes({16, 8, 4})
-          .setInterchange({1, 2, 0})
-          .setLoopType(LinalgTilingLoopType::ParallelLoops),
-      LinalgTransformationFilter(
-          StringAttr::get(ctx, "par__with_perm__"),
-          StringAttr::get(ctx, "after_par__with_perm__")));
-
   //===--------------------------------------------------------------------===//
   // Linalg to loops patterns.
   //===--------------------------------------------------------------------===//
   patterns.add<LinalgLoweringPattern<DotOp>>(
       ctx,
-      /*loweringType=*/LinalgLoweringType::Loops,
-      LinalgTransformationFilter(StringAttr::get(ctx, "REG")));
+      /*loweringType=*/LinalgLoweringType::Loops);
 
   //===--------------------------------------------------------------------===//
   // Linalg distribution patterns.
@@ -239,178 +149,6 @@ static void applyPatterns(func::FuncOp funcOp) {
   });
 }
 
-template <typename IdOp, typename NProcsOp>
-static SmallVector<ProcInfo, 2>
-getGpuProcIds(OpBuilder &b, Location loc, ArrayRef<Range> parallelLoopRanges,
-              ArrayRef<linalg::DistributionMethod> distributionMethod) {
-  size_t count = std::min<size_t>(3, parallelLoopRanges.size());
-  SmallVector<ProcInfo, 2> procInfo(count);
-  Type indexType = b.getIndexType();
-  for (unsigned i = 0; i < count; ++i) {
-    gpu::Dimension dim = *gpu::symbolizeDimension(i);
-    procInfo[count - 1 - i] = {b.create<IdOp>(loc, indexType, dim),
-                               b.create<NProcsOp>(loc, indexType, dim),
-                               distributionMethod[count - 1 - i]};
-  }
-  return procInfo;
-}
-
-static void fillTileAndDistributePatterns(MLIRContext *context,
-                                          RewritePatternSet &patterns) {
-  {
-    LinalgLoopDistributionOptions cyclicNprocsEqNiters;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsEqNumIters,
-        DistributionMethod::CyclicNumProcsEqNumIters};
-    cyclicNprocsEqNiters.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsEqNiters),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute1"),
-            StringAttr::get(context, "after_distribute1")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsGeNiters;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsGeNumIters,
-        DistributionMethod::CyclicNumProcsGeNumIters};
-    cyclicNprocsGeNiters.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsGeNiters),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute2"),
-            StringAttr::get(context, "after_distribute2")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsDefault;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::Cyclic, DistributionMethod::Cyclic};
-    cyclicNprocsDefault.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsDefault),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute3"),
-            StringAttr::get(context, "after_distribute3")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsMixed1;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsEqNumIters,
-        DistributionMethod::CyclicNumProcsGeNumIters};
-    cyclicNprocsMixed1.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsMixed1),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute4"),
-            StringAttr::get(context, "after_distribute4")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsMixed2;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::CyclicNumProcsGeNumIters,
-        DistributionMethod::Cyclic};
-    cyclicNprocsMixed2.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsMixed2),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute5"),
-            StringAttr::get(context, "after_distribute5")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsMixed3;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::Cyclic,
-        DistributionMethod::CyclicNumProcsEqNumIters};
-    cyclicNprocsMixed3.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::ParallelLoops)
-            .setDistributionOptions(cyclicNprocsMixed3),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "distribute6"),
-            StringAttr::get(context, "after_distribute6")));
-  }
-
-  {
-    LinalgLoopDistributionOptions cyclicNprocsEqNiters;
-    SmallVector<linalg::DistributionMethod> distributionMethod = {
-        DistributionMethod::Cyclic, DistributionMethod::Cyclic};
-    cyclicNprocsEqNiters.procInfo =
-        [distributionMethod](OpBuilder &b, Location loc,
-                             ArrayRef<Range> parallelLoopRanges) {
-          return getGpuProcIds<gpu::BlockIdOp, gpu::GridDimOp>(
-              b, loc, parallelLoopRanges, distributionMethod);
-        };
-    patterns.add<LinalgTilingPattern>(
-        MatmulOp::getOperationName(), context,
-        LinalgTilingOptions()
-            .setTileSizes({8, 8, 4})
-            .setLoopType(LinalgTilingLoopType::Loops)
-            .setDistributionOptions(cyclicNprocsEqNiters),
-        LinalgTransformationFilter(
-            StringAttr::get(context, "tensors_distribute1"),
-            StringAttr::get(context, "tensors_after_distribute1")));
-  }
-}
-
 static void applyVectorTransferForwardingPatterns(func::FuncOp funcOp) {
   RewritePatternSet forwardPattern(funcOp.getContext());
   forwardPattern.add<LinalgCopyVTRForwardingPattern>(funcOp.getContext());
@@ -445,33 +183,6 @@ static void applyExtractSliceOfPadTensorSwapPattern(func::FuncOp funcOp) {
   (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }
 
-static void applyTilePattern(func::FuncOp funcOp, const std::string &loopType,
-                             ArrayRef<int64_t> tileSizes,
-                             ArrayRef<int64_t> peeledLoops,
-                             bool scalarizeDynamicDims) {
-  MLIRContext *context = funcOp.getContext();
-  RewritePatternSet tilingPattern(context);
-  LinalgTilingLoopType type =
-      llvm::StringSwitch<LinalgTilingLoopType>(loopType)
-          .Case("for", LinalgTilingLoopType::Loops)
-          .Case("affine", LinalgTilingLoopType::AffineLoops)
-          .Case("parallel", LinalgTilingLoopType::ParallelLoops);
-  auto linalgTilingOptions = linalg::LinalgTilingOptions()
-                                 .setPeeledLoops(peeledLoops)
-                                 .setLoopType(type);
-  if (scalarizeDynamicDims) {
-    linalgTilingOptions.scalarizeDynamicDims();
-    assert(tileSizes.empty() &&
-           "tileSizes and scalarizeDynamicDims is mutually exclusive");
-  } else {
-    linalgTilingOptions.setTileSizes(tileSizes);
-  }
-  linalg::LinalgTransformationFilter f(StringAttr::get(context, "tile"));
-  TilingPatterns<linalg::MatmulOp, linalg::GenericOp>::insert(
-      tilingPattern, linalgTilingOptions, f);
-  (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern));
-}
-
 static void applySplitReduction(func::FuncOp funcOp) {
   RewritePatternSet patterns(funcOp.getContext());
   linalg::populateSplitReductionPattern(
@@ -521,12 +232,6 @@ void TestLinalgTransforms::runOnOperation() {
   };
   std::unique_ptr<void, decltype(lambda)> cleanupGuard{(void *)1, lambda};
 
-  if (testTileAndDistributionOptions) {
-    RewritePatternSet patterns(&getContext());
-    fillTileAndDistributePatterns(&getContext(), patterns);
-    (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-    return;
-  }
   if (testPatterns)
     return applyPatterns(getOperation());
   if (testVectorTransferForwardingPatterns)
@@ -539,12 +244,6 @@ void TestLinalgTransforms::runOnOperation() {
     return applyGeneralizePadTensorPatterns(getOperation());
   if (testSwapSubTensorPadTensor)
     return applyExtractSliceOfPadTensorSwapPattern(getOperation());
-  if (testTilePattern)
-    return applyTilePattern(getOperation(), loopType, tileSizes, peeledLoops,
-                            /*scalarizeDynamicDims=*/false);
-  if (testTileScalarizeDynamicDims)
-    return applyTilePattern(getOperation(), loopType, tileSizes,
-                            /*peeledLoops=*/{}, /*scalarizeDynamicDims=*/true);
   if (testSplitReduction)
     return applySplitReduction(getOperation());
   if (testSplitReductionInnerParallel)

diff  --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp
index e74be0d67a676..ad5dcab9c184e 100644
--- a/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp
+++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectInterpreter.cpp
@@ -57,10 +57,39 @@ class TestTransformDialectInterpreterPass
       llvm::cl::desc("perform expensive checks to better report errors in the "
                      "transform IR")};
 };
+
+struct TestTransformDialectEraseSchedulePass
+    : public PassWrapper<TestTransformDialectEraseSchedulePass,
+                         OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
+      TestTransformDialectEraseSchedulePass)
+
+  StringRef getArgument() const final {
+    return "test-transform-dialect-erase-schedule";
+  }
+
+  StringRef getDescription() const final {
+    return "erase transform dialect schedule from the IR";
+  }
+
+  void runOnOperation() override {
+    getOperation()->walk<WalkOrder::PreOrder>([&](Operation *nestedOp) {
+      if (isa<transform::TransformOpInterface>(nestedOp)) {
+        nestedOp->erase();
+        return WalkResult::skip();
+      }
+      return WalkResult::advance();
+    });
+  }
+};
 } // namespace
 
 namespace mlir {
 namespace test {
+/// Registers the test pass for erasing transform dialect ops.
+void registerTestTransformDialectEraseSchedulePass() {
+  PassRegistration<TestTransformDialectEraseSchedulePass> reg;
+}
 /// Registers the test pass for applying transform dialect ops.
 void registerTestTransformDialectInterpreterPass() {
   PassRegistration<TestTransformDialectInterpreterPass> reg;

diff  --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 37d331b1000d5..9eb0a47558dda 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -114,6 +114,7 @@ void registerTestSliceAnalysisPass();
 void registerTestTensorTransforms();
 void registerTestTilingInterface();
 void registerTestTopologicalSortAnalysisPass();
+void registerTestTransformDialectEraseSchedulePass();
 void registerTestTransformDialectInterpreterPass();
 void registerTestVectorLowerings();
 void registerTestNvgpuLowerings();
@@ -214,6 +215,7 @@ void registerTestPasses() {
   mlir::test::registerTestTensorTransforms();
   mlir::test::registerTestTilingInterface();
   mlir::test::registerTestTopologicalSortAnalysisPass();
+  mlir::test::registerTestTransformDialectEraseSchedulePass();
   mlir::test::registerTestTransformDialectInterpreterPass();
   mlir::test::registerTestVectorLowerings();
   mlir::test::registerTestNvgpuLowerings();