[Mlir-commits] [mlir] 004a3d4 - [mlir][Linalg] Refactor linalg tiling

Nicolas Vasilache llvmlistbot at llvm.org
Wed May 20 06:42:09 PDT 2020


Author: Nicolas Vasilache
Date: 2020-05-20T09:39:56-04:00
New Revision: 004a3d4f56c26a30d2a65818e2bfc03b74ec8123

URL: https://github.com/llvm/llvm-project/commit/004a3d4f56c26a30d2a65818e2bfc03b74ec8123
DIFF: https://github.com/llvm/llvm-project/commit/004a3d4f56c26a30d2a65818e2bfc03b74ec8123.diff

LOG: [mlir][Linalg] Refactor linalg tiling

Summary:
This revision refactors the Linalg tiling pass to be written as pattern applications and retires the use of the folder in Linalg tiling.
In the early days, tiling was written as a pass that would create (partially) folded and canonicalized operations on the fly for better composability.
As this evolves towards composition of patterns, the pass-specific folder is counter-productive and is retired.
The tiling options struct evolves to take a tile size creation function which allows materializing tile sizes on the fly (in particular constant tile sizes). This plays better with folding and DCE.

With the folder going away in Tiling, the check on whether subviews are the same in linalg fusion needs to be more robust. This revision also implements such a check.

In the current form, there are still some canonicalizations missing due to  AffineMin/Max ops fed by scf::ForOp. These will be improved at a later time.

Differential Revision: https://reviews.llvm.org/D80267

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
    mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
    mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
    mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
    mlir/test/Dialect/Linalg/tile.mlir
    mlir/test/Dialect/Linalg/tile_conv.mlir
    mlir/test/Dialect/Linalg/tile_conv_padding.mlir
    mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
    mlir/test/Dialect/Linalg/tile_parallel.mlir
    mlir/test/lib/Transforms/TestLinalgTransforms.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
index f580161c3ccb..bb9933798338 100644
--- a/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Builders.h
@@ -78,6 +78,8 @@ template <typename LoopTy> class GenericLoopNestRangeBuilder {
 public:
   GenericLoopNestRangeBuilder(MutableArrayRef<Value> ivs,
                               ArrayRef<Value> ranges);
+  GenericLoopNestRangeBuilder(MutableArrayRef<Value> ivs,
+                              ArrayRef<SubViewOp::Range> ranges);
   void operator()(std::function<void(void)> fun = nullptr) { (*builder)(fun); }
 
 private:

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index e93977185fb3..6d34a0943e5e 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -16,6 +16,8 @@
 namespace mlir {
 namespace linalg {
 
+struct LinalgTilingOptions;
+
 //===----------------------------------------------------------------------===//
 // Transformations exposed as function calls.
 //===----------------------------------------------------------------------===//
@@ -34,10 +36,6 @@ struct TiledLinalgOp {
 /// An empty vector is interpreted as the identity permutation and the
 /// transformation returns early.
 ///
-/// When non-null, the optional pointer `folder` is used to call into the
-/// `createAndFold` builder method. If `folder` is null, the regular `create`
-/// method is called.
-///
 /// Returns a struct containing the tiled loops in the specified order
 /// and the cloned op if successful, llvm::None otherwise.
 ///
@@ -46,26 +44,7 @@ struct TiledLinalgOp {
 /// integers, in the range 0..`tileSizes.size()` without duplications
 /// (i.e. `[1,1,2]` is an invalid permutation).
 Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
-                                     ArrayRef<Value> tileSizes,
-                                     ArrayRef<unsigned> interchangeVector = {},
-                                     OperationFolder *folder = nullptr);
-Optional<TiledLinalgOp>
-tileLinalgOpToParallelLoops(OpBuilder &b, LinalgOp op,
-                            ArrayRef<Value> tileSizes,
-                            ArrayRef<unsigned> interchangeVector = {},
-                            OperationFolder *folder = nullptr);
-
-/// Performs standalone tiling of a single LinalgOp by constant `tileSizes`.
-/// See `tileLinalgOp(... ArrayRef<Value> tileSizes,)` for more details
-Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
-                                     ArrayRef<int64_t> tileSizes,
-                                     ArrayRef<unsigned> interchangeVector = {},
-                                     OperationFolder *folder = nullptr);
-Optional<TiledLinalgOp>
-tileLinalgOpToParallelLoops(OpBuilder &b, LinalgOp op,
-                            ArrayRef<int64_t> tileSizes,
-                            ArrayRef<unsigned> interchangeVector = {},
-                            OperationFolder *folder = nullptr);
+                                     const LinalgTilingOptions &options);
 
 /// Interchanges the `iterator_types` and `iterator_maps` dimensions of `op`.
 /// This is an in-place transformation controlled by `interchangeVector`.
@@ -203,15 +182,34 @@ struct LinalgMarker {
 enum class LinalgTilingLoopType {
   Loops = 0,
   AffineLoops = 1,
-  ParallelLoops = 2
+  ParallelLoops = 2,
 };
+using TileSizeComputationFunction =
+    std::function<SmallVector<Value, 4>(OpBuilder &, Operation *)>;
 struct LinalgTilingOptions {
-  /// The tile sizes by which to tile.
-  SmallVector<int64_t, 4> tileSizes{};
-  LinalgTilingOptions &setTileSizes(ArrayRef<int64_t> ts) {
-    tileSizes.assign(ts.begin(), ts.end());
+  /// Computation function that returns the tile sizes for each operation.
+  /// Delayed construction of constant tile sizes should occur to interoperate
+  /// with folding.
+  TileSizeComputationFunction tileSizeComputationFunction = nullptr;
+  LinalgTilingOptions &
+  setTileSizeComputationFunction(TileSizeComputationFunction &fun) {
+    tileSizeComputationFunction = fun;
     return *this;
   }
+  /// Set the `tileSizeComputationFunction` to return the values `ts`. The
+  /// values must not fold away when tiling. Otherwise, use a more robust
+  /// `tileSizeComputationFunction`.
+  LinalgTilingOptions &setTileSizes(ValueRange ts) {
+    tileSizeComputationFunction = [&](OpBuilder &, Operation *) {
+      return SmallVector<Value, 4>(ts.begin(), ts.end());
+    };
+    return *this;
+  }
+  /// Convenience function to set the `tileSizeComputationFunction` to a
+  /// function that computes tile sizes at the point they are needed. Allows
+  /// proper interaction with folding.
+  LinalgTilingOptions &setTileSizes(ArrayRef<int64_t> ts);
+
   /// The interchange vector to reorder the tiled loops.
   SmallVector<unsigned, 4> interchangeVector{};
   LinalgTilingOptions &setInterchange(ArrayRef<unsigned> interchange) {
@@ -226,6 +224,12 @@ struct LinalgTilingOptions {
   }
 };
 
+/// Canonicalization patterns relevant to apply after tiling patterns. These are
+/// applied automatically by the tiling pass but need to be applied manually
+/// when tiling is called programmatically.
+OwningRewritePatternList
+getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx);
+
 struct LinalgBaseTilingPattern : public RewritePattern {
   LinalgBaseTilingPattern(StringRef opName, MLIRContext *context,
                           LinalgTilingOptions options,

diff  --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
index 33d2eeb9228b..afc572c16bbf 100644
--- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
@@ -78,6 +78,60 @@ Value LoopNestRangeBuilder::LoopNestRangeBuilder::operator()(
 namespace mlir {
 namespace edsc {
 
+static void unpackRanges(ArrayRef<Value> rangeOps, SmallVectorImpl<Value> &lbs,
+                         SmallVectorImpl<Value> &ubs,
+                         SmallVectorImpl<Value> &steps) {
+  for (Value range : rangeOps) {
+    assert(range.getType() && "expected linalg.range type");
+    assert(range.getDefiningOp() && "need operations to extract range parts");
+    RangeOp rangeOp = cast<RangeOp>(range.getDefiningOp());
+    lbs.emplace_back(rangeOp.min());
+    ubs.emplace_back(rangeOp.max());
+    steps.emplace_back(rangeOp.step());
+  }
+}
+
+static void unpackRanges(ArrayRef<SubViewOp::Range> ranges,
+                         SmallVectorImpl<Value> &lbs,
+                         SmallVectorImpl<Value> &ubs,
+                         SmallVectorImpl<Value> &steps) {
+  for (SubViewOp::Range range : ranges) {
+    lbs.emplace_back(range.offset);
+    ubs.emplace_back(range.size);
+    steps.emplace_back(range.stride);
+  }
+}
+
+template <>
+GenericLoopNestRangeBuilder<scf::ForOp>::GenericLoopNestRangeBuilder(
+    MutableArrayRef<Value> ivs, ArrayRef<SubViewOp::Range> ranges) {
+  builder = std::make_unique<LoopNestRangeBuilder>(ivs, ranges);
+}
+
+template <>
+GenericLoopNestRangeBuilder<AffineForOp>::GenericLoopNestRangeBuilder(
+    MutableArrayRef<Value> ivs, ArrayRef<SubViewOp::Range> ranges) {
+  SmallVector<Value, 4> lbs, ubs, steps;
+  unpackRanges(ranges, lbs, ubs, steps);
+  SmallVector<int64_t, 4> constantSteps;
+  constantSteps.reserve(steps.size());
+  for (Value v : steps) {
+    auto op = v.getDefiningOp<ConstantIndexOp>();
+    assert(op && "Affine loops require constant steps");
+    constantSteps.push_back(op.getValue());
+  }
+  builder =
+      std::make_unique<AffineLoopNestBuilder>(ivs, lbs, ubs, constantSteps);
+}
+
+template <>
+GenericLoopNestRangeBuilder<scf::ParallelOp>::GenericLoopNestRangeBuilder(
+    MutableArrayRef<Value> ivs, ArrayRef<SubViewOp::Range> ranges) {
+  SmallVector<Value, 4> lbs, ubs, steps;
+  unpackRanges(ranges, lbs, ubs, steps);
+  builder = std::make_unique<ParallelLoopNestBuilder>(ivs, lbs, ubs, steps);
+}
+
 template <>
 GenericLoopNestRangeBuilder<scf::ForOp>::GenericLoopNestRangeBuilder(
     MutableArrayRef<Value> ivs, ArrayRef<Value> ranges) {
@@ -87,32 +141,24 @@ GenericLoopNestRangeBuilder<scf::ForOp>::GenericLoopNestRangeBuilder(
 template <>
 GenericLoopNestRangeBuilder<AffineForOp>::GenericLoopNestRangeBuilder(
     MutableArrayRef<Value> ivs, ArrayRef<Value> ranges) {
-  SmallVector<Value, 4> lbs;
-  SmallVector<Value, 4> ubs;
-  SmallVector<int64_t, 4> steps;
-  for (Value range : ranges) {
-    assert(range.getType() && "expected linalg.range type");
-    assert(range.getDefiningOp() && "need operations to extract range parts");
-    RangeOp rangeOp = cast<RangeOp>(range.getDefiningOp());
-    lbs.emplace_back(rangeOp.min());
-    ubs.emplace_back(rangeOp.max());
-    steps.emplace_back(rangeOp.step());
+  SmallVector<Value, 4> lbs, ubs, steps;
+  unpackRanges(ranges, lbs, ubs, steps);
+  SmallVector<int64_t, 4> constantSteps;
+  constantSteps.reserve(steps.size());
+  for (Value v : steps) {
+    auto op = v.getDefiningOp<ConstantIndexOp>();
+    assert(op && "Affine loops require constant steps");
+    constantSteps.push_back(op.getValue());
   }
-  builder = std::make_unique<AffineLoopNestBuilder>(ivs, lbs, ubs, steps);
+  builder =
+      std::make_unique<AffineLoopNestBuilder>(ivs, lbs, ubs, constantSteps);
 }
 
 template <>
 GenericLoopNestRangeBuilder<scf::ParallelOp>::GenericLoopNestRangeBuilder(
     MutableArrayRef<Value> ivs, ArrayRef<Value> ranges) {
   SmallVector<Value, 4> lbs, ubs, steps;
-  for (Value range : ranges) {
-    assert(range.getType() && "expected linalg.range type");
-    assert(range.getDefiningOp() && "need operations to extract range parts");
-    RangeOp rangeOp = cast<RangeOp>(range.getDefiningOp());
-    lbs.emplace_back(rangeOp.min());
-    ubs.emplace_back(rangeOp.max());
-    steps.emplace_back(rangeOp.step());
-  }
+  unpackRanges(ranges, lbs, ubs, steps);
   builder = std::make_unique<ParallelLoopNestBuilder>(ivs, lbs, ubs, steps);
 }
 

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
index 34fe059415ff..fbeac83d4305 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -290,6 +290,34 @@ bool mlir::linalg::isFusableInto(const LinalgDependenceGraph &graph,
   return true;
 }
 
+static bool isSameSubView(Value a, Value b) {
+  if (a == b)
+    return true;
+  auto sva = a.getDefiningOp<SubViewOp>();
+  auto svb = b.getDefiningOp<SubViewOp>();
+  if (!sva || !svb)
+    return false;
+  if (!isSameSubView(sva.getViewSource(), svb.getViewSource()))
+    return false;
+  if (sva.getType() != svb.getType())
+    return false;
+  if (sva.getRank() != svb.getRank())
+    return false;
+  if (sva.getNumOperands() != svb.getNumOperands())
+    return false;
+  if (sva.static_offsets() != svb.static_offsets())
+    return false;
+  if (sva.static_sizes() != svb.static_sizes())
+    return false;
+  if (sva.static_strides() != svb.static_strides())
+    return false;
+  /// Skip the "viewSource" operand.
+  for (unsigned idx = 1, e = sva.getNumOperands(); idx != e; ++idx)
+    if (sva.getOperand(idx) != svb.getOperand(idx))
+      return false;
+  return true;
+}
+
 static Optional<FusionInfo>
 fuseProducerOfDep(OpBuilder &b, LinalgOp consumer, unsigned consumerIdx,
                   const LinalgDependenceGraph &graph, OperationFolder *folder,
@@ -305,7 +333,7 @@ fuseProducerOfDep(OpBuilder &b, LinalgOp consumer, unsigned consumerIdx,
 
     // Check that the dependence is indeed on the input `consumerIdx` view.
     auto consumedView = dependence.indexingView;
-    if (consumer.getBuffer(consumerIdx) != consumedView)
+    if (!isSameSubView(consumer.getBuffer(consumerIdx), consumedView))
       continue;
 
     // Consumer consumes this view, `isStructurallyFusableProducer` also checks

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 4c460eaeaba0..2d875d4e95e4 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -38,8 +38,9 @@ using folded_affine_min = FoldedValueBuilder<AffineMinOp>;
 #define DEBUG_TYPE "linalg-tiling"
 
 static bool isZero(Value v) {
-  return isa_and_nonnull<ConstantIndexOp>(v.getDefiningOp()) &&
-         cast<ConstantIndexOp>(v.getDefiningOp()).getValue() == 0;
+  if (auto cst = v.getDefiningOp<ConstantIndexOp>())
+    return cst.getValue() == 0;
+  return false;
 }
 
 using LoopIndexToRangeIndexMap = DenseMap<int, int>;
@@ -55,11 +56,11 @@ using LoopIndexToRangeIndexMap = DenseMap<int, int>;
 // indices of newly created loops.
 static std::tuple<SmallVector<SubViewOp::Range, 4>, LoopIndexToRangeIndexMap>
 makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,
-                    ArrayRef<Value> allViewSizes, ArrayRef<Value> allTileSizes,
-                    OperationFolder *folder) {
+                    ArrayRef<Value> allViewSizes,
+                    ArrayRef<Value> allTileSizes) {
   assert(allTileSizes.size() == map.getNumResults());
   // Apply `map` to get view sizes in loop order.
-  auto viewSizes = applyMapToValues(b, loc, map, allViewSizes, folder);
+  auto viewSizes = applyMapToValues(b, loc, map, allViewSizes);
   SmallVector<Value, 4> tileSizes(allTileSizes.begin(), allTileSizes.end());
 
   // Traverse the tile sizes, which are in loop order, erase zeros everywhere.
@@ -76,10 +77,9 @@ makeTiledLoopRanges(OpBuilder &b, Location loc, AffineMap map,
 
   // Create a new range with the applied tile sizes.
   SmallVector<SubViewOp::Range, 4> res;
-  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
-    res.push_back(SubViewOp::Range{folded_std_constant_index(folder, 0),
-                                   viewSizes[idx], tileSizes[idx]});
-  }
+  for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
+    res.push_back(SubViewOp::Range{std_constant_index(0), viewSizes[idx],
+                                   tileSizes[idx]});
   return std::make_tuple(res, loopIndexToRangeIndex);
 }
 
@@ -222,10 +222,11 @@ static bool isTiled(AffineMap map, ArrayRef<Value> tileSizes) {
   return false;
 }
 
-static SmallVector<Value, 4>
-makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp,
-               ArrayRef<Value> ivs, ArrayRef<Value> tileSizes,
-               ArrayRef<Value> viewSizes, OperationFolder *folder) {
+static SmallVector<Value, 4> makeTiledViews(OpBuilder &b, Location loc,
+                                            LinalgOp linalgOp,
+                                            ArrayRef<Value> ivs,
+                                            ArrayRef<Value> tileSizes,
+                                            ArrayRef<Value> viewSizes) {
   assert(linalgOp.hasBufferSemantics() &&
          "expected linalg op with buffer semantics");
   assert(ivs.size() == static_cast<size_t>(llvm::count_if(
@@ -240,8 +241,7 @@ makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp,
   SmallVector<Value, 8> lbs, subViewSizes;
   for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
     bool isTiled = !isZero(tileSizes[idx]);
-    lbs.push_back(isTiled ? ivs[idxIvs++]
-                          : (Value)folded_std_constant_index(folder, 0));
+    lbs.push_back(isTiled ? ivs[idxIvs++] : (Value)std_constant_index(0));
     subViewSizes.push_back(isTiled ? tileSizes[idx] : viewSizes[idx]);
   }
 
@@ -270,18 +270,18 @@ makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp,
     strides.reserve(rank);
     for (unsigned r = 0; r < rank; ++r) {
       if (!isTiled(map.getSubMap({r}), tileSizes)) {
-        offsets.push_back(folded_std_constant_index(folder, 0));
+        offsets.push_back(std_constant_index(0));
         sizes.push_back(std_dim(view, r));
-        strides.push_back(folded_std_constant_index(folder, 1));
+        strides.push_back(std_constant_index(1));
         continue;
       }
 
       // Tiling creates a new slice at the proper index, the slice step is 1
       // (i.e. the slice view does not subsample, stepping occurs in the loop).
       auto m = map.getSubMap({r});
-      auto offset = applyMapToValues(b, loc, m, lbs, folder).front();
+      auto offset = applyMapToValues(b, loc, m, lbs).front();
       offsets.push_back(offset);
-      auto size = applyMapToValues(b, loc, m, subViewSizes, folder).front();
+      auto size = applyMapToValues(b, loc, m, subViewSizes).front();
 
       // The size of the subview should be trimmed to avoid out-of-bounds
       // accesses, unless we statically know the subview size divides the view
@@ -297,64 +297,64 @@ makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp,
              getAffineDimExpr(/*position=*/1, b.getContext()) -
                  getAffineDimExpr(/*position=*/2, b.getContext())},
             b.getContext());
-        auto d = folded_std_dim(folder, view, r);
-        size = folded_affine_min(folder, b.getIndexType(), minMap,
-                                 ValueRange{size, d, offset});
+        auto d = std_dim(view, r);
+        size =
+            affine_min(b.getIndexType(), minMap, ValueRange{size, d, offset});
       }
 
       sizes.push_back(size);
-      strides.push_back(folded_std_constant_index(folder, 1));
+      strides.push_back(std_constant_index(1));
     }
 
     res.push_back(b.create<SubViewOp>(loc, view, offsets, sizes, strides));
   }
 
-  // Traverse the mins/maxes and erase those that don't have uses left.
-  // This is a special type of folding that we only apply when `folder` is
-  // defined.
-  if (folder)
-    for (auto v : llvm::concat<Value>(lbs, subViewSizes))
-      if (v.use_empty())
-        v.getDefiningOp()->erase();
-
   return res;
 }
 
 template <typename LoopTy>
 Optional<TiledLinalgOp> static tileLinalgOpImpl(
-    OpBuilder &b, LinalgOp op, ArrayRef<Value> tileSizes,
-    ArrayRef<unsigned> interchangeVector, OperationFolder *folder) {
+    OpBuilder &b, LinalgOp op, const LinalgTilingOptions &options) {
+  OpBuilder::InsertionGuard g(b);
+  b.setInsertionPoint(op);
+  ScopedContext scope(b, op.getLoc());
+
   assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics");
   // 1. Enforce the convention that "tiling by zero" skips tiling a particular
   // dimension. This convention is significantly simpler to handle instead of
   // adjusting affine maps to account for missing dimensions.
-  assert(op.getNumParallelLoops() + op.getNumReductionLoops() +
-                 op.getNumWindowLoops() ==
-             tileSizes.size() &&
-         "expected matching number of tile sizes and loops");
+  auto nLoops = op.getNumLoops();
+  SmallVector<Value, 4> tileSizeVector =
+      options.tileSizeComputationFunction(b, op);
+  if (tileSizeVector.size() < nLoops) {
+    auto zero = std_constant_index(0);
+    tileSizeVector.append(nLoops - tileSizeVector.size(), zero);
+  }
+
+  ArrayRef<Value> tileSizes = tileSizeVector;
+  // Initial tile sizes may be too big, only take the first nLoops.
+  tileSizes = tileSizes.take_front(nLoops);
+
+  if (llvm::all_of(tileSizes, isZero))
+    return llvm::None;
 
   if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) {
     // For conv op only support tiling along batch dimension (which is the first
     // loop).
-    if (convOp.padding() &&
-        !llvm::all_of(tileSizes.drop_front(),
-                      [](Value val) { return isZero(val); }))
+    if (convOp.padding() && !llvm::all_of(tileSizes.drop_front(), isZero))
       return llvm::None;
   }
 
   // If interchangeVector is empty, use the identity. Build the permutation map
   // otherwise.
-  auto invPermutationMap = AffineMap::getMultiDimIdentityMap(
-      tileSizes.size(), ScopedContext::getContext());
-  if (!interchangeVector.empty())
+  auto invPermutationMap =
+      AffineMap::getMultiDimIdentityMap(tileSizes.size(), b.getContext());
+  if (!options.interchangeVector.empty())
     invPermutationMap = inversePermutation(AffineMap::getPermutationMap(
-        interchangeVector, ScopedContext::getContext()));
+        options.interchangeVector, b.getContext()));
   if (!invPermutationMap)
     return llvm::None;
 
-  OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(op);
-  ScopedContext scope(b, op.getLoc());
   // 2. Build the tiled loop ranges.
   auto viewSizes = getViewSizes(b, op);
   // The flattened loopToOperandRangesMaps is expected to be an invertible
@@ -368,22 +368,15 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(
 
   SmallVector<SubViewOp::Range, 4> loopRanges;
   LoopIndexToRangeIndexMap loopIndexToRangeIndex;
-  std::tie(loopRanges, loopIndexToRangeIndex) =
-      makeTiledLoopRanges(b, scope.getLocation(), viewSizesToLoopsMap,
-                          viewSizes, tileSizes, folder);
-  if (!interchangeVector.empty())
-    applyPermutationToVector(loopRanges, interchangeVector);
+  std::tie(loopRanges, loopIndexToRangeIndex) = makeTiledLoopRanges(
+      b, scope.getLocation(), viewSizesToLoopsMap, viewSizes, tileSizes);
+  if (!options.interchangeVector.empty())
+    applyPermutationToVector(loopRanges, options.interchangeVector);
 
   // 3. Create the tiled loops.
   LinalgOp res = op;
   SmallVector<Value, 4> ivs(loopRanges.size());
-  // Convert SubViewOp::Range to linalg_range.
-  SmallVector<Value, 4> linalgRanges;
-  for (auto &range : loopRanges) {
-    linalgRanges.push_back(
-        linalg_range(range.offset, range.size, range.stride));
-  }
-  GenericLoopNestRangeBuilder<LoopTy>(ivs, linalgRanges)([&] {
+  GenericLoopNestRangeBuilder<LoopTy>(ivs, loopRanges)([&] {
     auto &b = ScopedContext::getBuilderRef();
     auto loc = ScopedContext::getLocation();
     SmallVector<Value, 4> ivValues(ivs.begin(), ivs.end());
@@ -393,11 +386,10 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(
     // assuming that loopRanges have previously been permuted by
     // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation of
     // that one: (d0,d1,d2)->(d2,d0,d1)
-    if (!interchangeVector.empty())
-      ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues, folder);
+    if (!options.interchangeVector.empty())
+      ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues);
 
-    auto views =
-        makeTiledViews(b, loc, op, ivValues, tileSizes, viewSizes, folder);
+    auto views = makeTiledViews(b, loc, op, ivValues, tileSizes, viewSizes);
     auto operands = getAssumedNonViewOperands(op);
     views.append(operands.begin(), operands.end());
     res = op.clone(b, loc, views);
@@ -413,102 +405,106 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(
     loops.push_back(iv.cast<BlockArgument>().getOwner()->getParentOp());
     assert(loops.back() && "no owner found for induction variable!");
   }
-
   return TiledLinalgOp{res, loops};
 }
 
-template <typename LoopTy>
-static Optional<TiledLinalgOp>
-tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef<int64_t> tileSizes,
-                 ArrayRef<unsigned> interchangeVector,
-                 OperationFolder *folder) {
-  assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics");
-  if (tileSizes.empty())
-    return llvm::None;
-
-  // The following uses the convention that "tiling by zero" skips tiling a
-  // particular dimension. This convention is significantly simpler to handle
-  // instead of adjusting affine maps to account for missing dimensions.
-  auto nLoops = op.getNumParallelLoops() + op.getNumReductionLoops() +
-                op.getNumWindowLoops();
-  tileSizes = tileSizes.take_front(nLoops);
-  // If only 0 tilings are left, then return.
-  if (llvm::all_of(tileSizes, [](int64_t v) { return v == 0; }))
-    return llvm::None;
-
-  if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) {
-    // For conv op only support tiling along batch dimension (which is the first
-    // loop).
-    if (convOp.padding() && !llvm::all_of(tileSizes.drop_front(),
-                                          [](int64_t val) { return val == 0; }))
-      return llvm::None;
-  }
+Optional<TiledLinalgOp>
+mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
+                           const LinalgTilingOptions &options) {
+  if (options.loopType == LinalgTilingLoopType::Loops)
+    return tileLinalgOpImpl<scf::ForOp>(b, op, options);
+  if (options.loopType == LinalgTilingLoopType::ParallelLoops)
+    return tileLinalgOpImpl<scf::ParallelOp>(b, op, options);
+  // TODO: Impl tiling to affine loops when it makes sense.
+  return llvm::None;
+}
 
-  // Create a builder for tile size constants.
-  OpBuilder::InsertionGuard g(b);
-  b.setInsertionPoint(op);
-  ScopedContext scope(b, op.getLoc());
+namespace {
+/// Helper classes for type list expansion.
+template <typename... OpTypes>
+class CanonicalizationPatternList;
+
+template <>
+class CanonicalizationPatternList<> {
+public:
+  static void insert(OwningRewritePatternList &patterns, MLIRContext *ctx) {}
+};
 
-  // Materialize concrete tile size values to pass the generic tiling function.
-  SmallVector<Value, 8> tileSizeValues;
-  tileSizeValues.reserve(tileSizes.size());
-  for (auto ts : tileSizes)
-    tileSizeValues.push_back(folded_std_constant_index(folder, ts));
-  // Pad tile sizes with zero values to enforce our convention.
-  if (tileSizeValues.size() < nLoops) {
-    for (unsigned i = tileSizeValues.size(); i < nLoops; ++i)
-      tileSizeValues.push_back(folded_std_constant_index(folder, 0));
+template <typename OpTy, typename... OpTypes>
+class CanonicalizationPatternList<OpTy, OpTypes...> {
+public:
+  static void insert(OwningRewritePatternList &patterns, MLIRContext *ctx) {
+    OpTy::getCanonicalizationPatterns(patterns, ctx);
+    CanonicalizationPatternList<OpTypes...>::insert(patterns, ctx);
   }
+};
 
-  return tileLinalgOpImpl<LoopTy>(b, op, tileSizeValues, interchangeVector,
-                                  folder);
-}
+/// Helper classes for type list expansion.
+template <typename... OpTypes>
+class RewritePatternList;
 
-Optional<TiledLinalgOp>
-mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op, ArrayRef<Value> tileSizes,
-                           ArrayRef<unsigned> interchangeVector,
-                           OperationFolder *folder) {
-  return tileLinalgOpImpl<scf::ForOp>(b, op, tileSizes, interchangeVector,
-                                      folder);
-}
+template <>
+class RewritePatternList<> {
+public:
+  static void insert(OwningRewritePatternList &patterns,
+                     const LinalgTilingOptions &options, MLIRContext *ctx) {}
+};
 
-Optional<TiledLinalgOp> mlir::linalg::tileLinalgOpToParallelLoops(
-    OpBuilder &b, LinalgOp op, ArrayRef<Value> tileSizes,
-    ArrayRef<unsigned> interchangeVector, OperationFolder *folder) {
-  return tileLinalgOpImpl<scf::ParallelOp>(b, op, tileSizes, interchangeVector,
-                                           folder);
-}
+template <typename OpTy, typename... OpTypes>
+class RewritePatternList<OpTy, OpTypes...> {
+public:
+  static void insert(OwningRewritePatternList &patterns,
+                     const LinalgTilingOptions &options, MLIRContext *ctx) {
+    patterns.insert<LinalgTilingPattern<OpTy>>(ctx, options,
+                                               LinalgMarker({}, "tiled"));
+    RewritePatternList<OpTypes...>::insert(patterns, options, ctx);
+  }
+};
+} // namespace
 
-Optional<TiledLinalgOp> mlir::linalg::tileLinalgOp(
-    OpBuilder &b, LinalgOp op, ArrayRef<int64_t> tileSizes,
-    ArrayRef<unsigned> interchangeVector, OperationFolder *folder) {
-  return tileLinalgOpImpl<scf::ForOp>(b, op, tileSizes, interchangeVector,
-                                      folder);
+OwningRewritePatternList
+mlir::linalg::getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx) {
+  OwningRewritePatternList patterns;
+  AffineApplyOp::getCanonicalizationPatterns(patterns, ctx);
+  AffineForOp::getCanonicalizationPatterns(patterns, ctx);
+  AffineMinOp::getCanonicalizationPatterns(patterns, ctx);
+  AffineMaxOp::getCanonicalizationPatterns(patterns, ctx);
+  scf::ForOp::getCanonicalizationPatterns(patterns, ctx);
+  scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
+  ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
+  SubViewOp::getCanonicalizationPatterns(patterns, ctx);
+  ViewOp::getCanonicalizationPatterns(patterns, ctx);
+  CanonicalizationPatternList<
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
+      >::insert(patterns, ctx);
+  return patterns;
 }
 
-Optional<TiledLinalgOp> mlir::linalg::tileLinalgOpToParallelLoops(
-    OpBuilder &b, LinalgOp op, ArrayRef<int64_t> tileSizes,
-    ArrayRef<unsigned> interchangeVector, OperationFolder *folder) {
-  return tileLinalgOpImpl<scf::ParallelOp>(b, op, tileSizes, interchangeVector,
-                                           folder);
+/// Populate the given list with patterns that apply Linalg tiling.
+static void insertTilingPatterns(OwningRewritePatternList &patterns,
+                                 const LinalgTilingOptions &options,
+                                 MLIRContext *ctx) {
+  RewritePatternList<
+#define GET_OP_LIST
+#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
+      >::insert(patterns, options, ctx);
 }
 
-template <typename LoopTy>
-static void tileLinalgOps(FuncOp f, ArrayRef<int64_t> tileSizes) {
-  OpBuilder b(f);
-  OperationFolder folder(f.getContext());
-  f.walk([tileSizes, &b, &folder](LinalgOp op) {
-    if (!op.hasBufferSemantics())
-      return;
-    auto opLoopsPair = tileLinalgOpImpl<LoopTy>(
-        b, op, tileSizes, /*interchangeVector=*/{}, &folder);
-    // If tiling occurred successfully, erase old op.
-    if (opLoopsPair)
-      op.erase();
-  });
-  f.walk([](LinalgOp op) {
-    if (isOpTriviallyDead(op))
-      op.erase();
+static void applyTilingToLoopPatterns(LinalgTilingLoopType loopType,
+                                      FuncOp funcOp,
+                                      ArrayRef<int64_t> tileSizes) {
+  auto options =
+      LinalgTilingOptions().setTileSizes(tileSizes).setLoopType(loopType);
+  MLIRContext *ctx = funcOp.getContext();
+  OwningRewritePatternList patterns;
+  insertTilingPatterns(patterns, options, ctx);
+  applyPatternsAndFoldGreedily(funcOp, patterns);
+  applyPatternsAndFoldGreedily(funcOp,
+                               getLinalgTilingCanonicalizationPatterns(ctx));
+  // Drop the marker.
+  funcOp.walk([](LinalgOp op) {
+    op.removeAttr(LinalgTransforms::kLinalgTransformMarker);
   });
 }
 
@@ -518,7 +514,8 @@ struct LinalgTilingPass : public LinalgTilingBase<LinalgTilingPass> {
   LinalgTilingPass(ArrayRef<int64_t> sizes) { tileSizes = sizes; }
 
   void runOnFunction() override {
-    tileLinalgOps<scf::ForOp>(getFunction(), tileSizes);
+    applyTilingToLoopPatterns(LinalgTilingLoopType::Loops, getFunction(),
+                              tileSizes);
   }
 };
 
@@ -530,7 +527,8 @@ struct LinalgTilingToParallelLoopsPass
   }
 
   void runOnFunction() override {
-    tileLinalgOps<scf::ParallelOp>(getFunction(), tileSizes);
+    applyTilingToLoopPatterns(LinalgTilingLoopType::ParallelLoops,
+                              getFunction(), tileSizes);
   }
 };
 

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 96d97809a9d3..2ce949aa034c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -98,6 +98,21 @@ void mlir::linalg::LinalgMarker::replaceLinalgMarker(PatternRewriter &rewriter,
                                    rewriter.getContext()));
 }
 
+LinalgTilingOptions &
+mlir::linalg::LinalgTilingOptions::setTileSizes(ArrayRef<int64_t> ts) {
+  SmallVector<int64_t, 4> tileSizes(ts.begin(), ts.end());
+  tileSizeComputationFunction = [tileSizes](OpBuilder &b, Operation *op) {
+    OpBuilder::InsertionGuard guard(b);
+    b.setInsertionPointToStart(
+        &op->getParentOfType<FuncOp>().getBody().front());
+    return llvm::to_vector<4>(llvm::map_range(tileSizes, [&](int64_t s) {
+      Value v = b.create<ConstantIndexOp>(op->getLoc(), s);
+      return v;
+    }));
+  };
+  return *this;
+};
+
 /// Linalg base tiling pattern.
 mlir::linalg::LinalgBaseTilingPattern::LinalgBaseTilingPattern(
     StringRef opName, MLIRContext *context, LinalgTilingOptions options,
@@ -112,14 +127,7 @@ LogicalResult mlir::linalg::LinalgBaseTilingPattern::matchAndRewrite(
     return failure();
   if (failed(marker.checkAndNotify(rewriter, linalgOp)))
     return failure();
-  Optional<TiledLinalgOp> res;
-  if (options.loopType == LinalgTilingLoopType::Loops)
-    res = tileLinalgOp(rewriter, linalgOp, options.tileSizes,
-                       options.interchangeVector);
-  else if (options.loopType == LinalgTilingLoopType::ParallelLoops)
-    res = tileLinalgOpToParallelLoops(rewriter, linalgOp, options.tileSizes,
-                                      options.interchangeVector);
-  // TODO: Impl tiling to affine loops when it makes sense.
+  Optional<TiledLinalgOp> res = tileLinalgOp(rewriter, linalgOp, options);
 
   if (!res)
     return failure();

diff  --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir
index 47fac4c8bf4b..41dd7796c7ab 100644
--- a/mlir/test/Dialect/Linalg/tile.mlir
+++ b/mlir/test/Dialect/Linalg/tile.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-2
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,2" | FileCheck %s -check-prefix=TILE-02
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,2" | FileCheck %s -check-prefix=TILE-002
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" | FileCheck %s -check-prefix=TILE-234
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-2
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-02
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,2" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-002
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" -mlir-disable-threading=true | FileCheck %s -check-prefix=TILE-234
 
 //   TILE-2-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
 //  TILE-02-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
@@ -13,26 +13,16 @@
 // TILE-002-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
 // TILE-234-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
 
-//   TILE-2-DAG: #[[bound_map:.*]] = affine_map<(d0, d1, d2) -> (2, d1 - d2)>
-//  TILE-02-DAG: #[[bound_map:.*]] = affine_map<(d0, d1, d2) -> (2, d1 - d2)>
-// TILE-002-DAG: #[[bound_map:.*]] = affine_map<(d0, d1, d2) -> (2, d1 - d2)>
-// TILE-234-DAG: #[[bound_map_2:.*]] = affine_map<(d0, d1, d2) -> (2, d1 - d2)>
-// TILE-234-DAG: #[[bound_map_3:.*]] = affine_map<(d0, d1, d2) -> (3, d1 - d2)>
-// TILE-234-DAG: #[[bound_map_4:.*]] = affine_map<(d0, d1, d2) -> (4, d1 - d2)>
+//   TILE-2-DAG: #[[bound_map:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
+//  TILE-02-DAG: #[[bound_map:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
+// TILE-002-DAG: #[[bound_map:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
+// TILE-234-DAG: #[[bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
+// TILE-234-DAG: #[[bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
+// TILE-234-DAG: #[[bound_map_4:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
 
-//   TILE-2-DAG: #[[strided1D_dynamic:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-//   TILE-02-DAG: #[[strided1D_dynamic:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-//   T_ILE-002-DAG: #[[strided1D_dynamic:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-//   TILE-234-DAG: #[[strided1D_dynamic:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-//   TILE-2-DAG: #[[strided2D_dynamic:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
-//   TILE-02-DAG: #[[strided2D_dynamic:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
-//   TILE-002-DAG: #[[strided2D_dynamic:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
-//   TILE-234-DAG: #[[strided2D_dynamic:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
-
-//   REACTIVATE_ME_TILE-2-DAG: #[[stride_99_1_layout_map:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 99 + s0 + d1)>
-//  REACTIVATE_ME_TILE-02-DAG: #[[stride_99_1_layout_map:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 99 + s0 + d1)>
-// REACTIVATE_ME_TILE-234-DAG: #[[stride_99_1_layout_map:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 99 + s0 + d1)>
+//   TILE-2-DAG: #[[stride_99_1_layout_map:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 99 + s0 + d1)>
+//  TILE-02-DAG: #[[stride_99_1_layout_map:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 99 + s0 + d1)>
+// TILE-234-DAG: #[[stride_99_1_layout_map:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 99 + s0 + d1)>
 
 func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg2: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
   linalg.matmul(%arg0, %arg1, %arg2) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>
@@ -40,55 +30,51 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 }
 // TILE-2-LABEL: func @matmul(
 //       TILE-2-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
+//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[I]])[%[[localM]]]
 //       TILE-2:   %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], %[[C0]]] [%[[szM]], %[[K]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-2:   %[[localK:.*]] = dim %{{.*}}, 0
-//       TILE-2:   %[[szK:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localK]], %[[I]])
+//       TILE-2:   %[[szK:.*]] = affine.min #[[bound_map]](%[[I]])[%[[localK]]]
 //       TILE-2:   %[[N:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]], %[[C0]]] [%[[szK]], %[[N]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-2:   linalg.matmul(%[[sAi]], %{{.*}}, %[[sCi]]) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
+//       TILE-2:   linalg.matmul(%[[sAi]], %{{.*}}, %[[sCi]]) : memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>
 
 // TILE-02-LABEL: func @matmul(
 //       TILE-02-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[N:.*]] = dim %arg1, 1 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
 //       TILE-02:   %[[K:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localN:.*]] = dim %{{.*}}, 1
-//       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[J]])
-//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[%[[C0]], %[[J]]] [%[[K]], %[[szN]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[J]])[%[[localN]]]
+//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localK:.*]] = dim %{{.*}}, 1
-//       TILE-02:   %[[szK:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localK]], %[[J]])
-//       TILE-02:   %[[sCj:.*]] = subview %{{.*}}[%[[C0]], %[[J]]] [%[[M]], %[[szK]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-02:   linalg.matmul(%{{.*}}, %[[sBj]], %[[sCj]]) : memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-02:   %[[szK:.*]] = affine.min #[[bound_map]](%[[J]])[%[[localK]]]
+//       TILE-02:   %[[sCj:.*]] = subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
+//       TILE-02:   linalg.matmul(%{{.*}}, %[[sBj]], %[[sCj]]) : memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>
 
 // TILE-002-LABEL: func @matmul(
 //       TILE-002-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-002-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-002-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-002: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-002:   %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-002:   %[[localK:.*]] = dim %{{.*}}, 1
-//       TILE-002:   %[[szK:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localK]], %[[K]])
-//       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[%[[C0]], %[[K]]] [%[[M]], %[[szK]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-002:   %[[szK:.*]] = affine.min #[[bound_map]](%[[K]])[%[[localK]]]
+//       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-002:   %[[localK:.*]] = dim %{{.*}}, 0
-//       TILE-002:   %[[szK:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localK]], %[[K]])
+//       TILE-002:   %[[szK:.*]] = affine.min #[[bound_map]](%[[K]])[%[[localK]]]
 //       TILE-002:   %[[N:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-002:   %[[sBj:.*]] = subview %{{.*}}[%[[K]], %[[C0]]] [%[[szK]], %[[N]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-002:   linalg.matmul(%[[sAj]], %[[sBj]], %{{.*}}) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D]]>
+//       TILE-002:   %[[sBj:.*]] = subview %{{.*}}[%[[K]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
+//       TILE-002:   linalg.matmul(%[[sAj]], %[[sBj]], %{{.*}}) : memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>
 
 // TILE-234-LABEL: func @matmul(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-234-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234-DAG: %[[C3:.*]] = constant 3 : index
 //       TILE-234-DAG: %[[C4:.*]] = constant 4 : index
@@ -99,22 +85,22 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
 //       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-234:        %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-234:        %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
+//       TILE-234:        %[[szM:.*]] = affine.min #[[bound_map_2]](%[[I]])[%[[localM]]]
 //       TILE-234:        %[[localK:.*]] = dim %{{.*}}, 1
-//       TILE-234:        %[[szK:.*]] = affine.min #[[bound_map_4]](%[[C4]], %[[localK]], %[[K]])
-//       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-234:        %[[szK:.*]] = affine.min #[[bound_map_4]](%[[K]])[%[[localK]]]
+//       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-234:        %[[localK:.*]] = dim %{{.*}}, 0
-//       TILE-234:        %[[szK:.*]] = affine.min #[[bound_map_4]](%[[C4]], %[[localK]], %[[K]])
+//       TILE-234:        %[[szK:.*]] = affine.min #[[bound_map_4]](%[[K]])[%[[localK]]]
 //       TILE-234:        %[[localN:.*]] = dim %{{.*}}, 1
-//       TILE-234:        %[[szN:.*]] = affine.min #[[bound_map_3]](%[[C3]], %[[localN]], %[[J]])
-//       TILE-234:        %[[sBkj:.*]] = subview %{{.*}}[%[[K]], %[[J]]] [%[[szK]], %[[szN]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-234:        %[[szN:.*]] = affine.min #[[bound_map_3]](%[[J]])[%[[localN]]]
+//       TILE-234:        %[[sBkj:.*]] = subview %{{.*}}[%[[K]], %[[J]]] [%[[szK]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-234:        %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-234:        %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
+//       TILE-234:        %[[szM:.*]] = affine.min #[[bound_map_2]](%[[I]])[%[[localM]]]
 //       TILE-234:        %[[localN:.*]] = dim %{{.*}}, 1
-//       TILE-234:        %[[szN:.*]] = affine.min #[[bound_map_3]](%[[C3]], %[[localN]], %[[J]])
-//       TILE-234:        %[[sCij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-234:        %[[szN:.*]] = affine.min #[[bound_map_3]](%[[J]])[%[[localN]]]
+//       TILE-234:        %[[sCij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //
-//       TILE-234:        linalg.matmul(%[[sAik]], %[[sBkj]], %[[sCij]]) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-234:        linalg.matmul(%[[sAik]], %[[sBkj]], %[[sCij]]) : memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>
 
 // When the buffer shapes are known at compile time, it is possible to avoid
 // the "min" in subview size computation. This test uses buffer sizes divisible
@@ -125,106 +111,107 @@ func @matmul_static(%arg0: memref<10x16xf32, offset: ?, strides: [?, 1]>, %arg1:
   return
 }
 // TILE-2-LABEL: func @matmul_static(
+//  TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
+//  TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
+//  TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
 //       TILE-2-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[K:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], %[[C0]]] [%[[C2]], %[[K]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-2:   %[[N:.*]] = dim %{{.*}}, 1 : memref<10x12xf32, #[[strided2D]]>
-//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]], %[[C0]]] [%[[C2]], %[[N]]] [%[[C1]], %[[C1]]] : memref<10x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-2-DAG: %[[M:.*]] = constant 10 : index
+//       TILE-2: scf.for %[[I:.*]] = %{{.*}} to %[[M]] step %{{.*}} {
+//       TILE-2:   %[[MIN2:.*]] = affine.min #map2(%[[I]])
+//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[MIN2]], 16] [1, 1] : memref<10x16xf32, #[[strided2D]]> to memref<?x16xf32, #[[strided2D]]>
+//       TILE-2:   %[[MIN22:.*]] = affine.min #map2(%[[I]])
+//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[MIN22]], 12] [1, 1] : memref<10x12xf32, #[[strided2D]]> to memref<?x12xf32, #[[strided2D]]>
 //       TILE-2:   linalg.matmul(%[[sAi]], %{{.*}}, %[[sCi]])
 
 // TILE-02-LABEL: func @matmul_static(
 //       TILE-02-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-02: %[[N:.*]] = dim %arg1, 1 : memref<16x12xf32, #[[strided2D]]>
+//       TILE-02-DAG: %[[N:.*]] = constant 12 : index
 //       TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
-//       TILE-02:   %[[K:.*]] = dim %{{.*}}, 0 : memref<16x12xf32, #[[strided2D]]>
-//   TILE-02-NOT:   affine.min
-//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[%[[C0]], %[[J]]] [%[[K]], %[[C2]]] [%[[C1]], %[[C1]]] : memref<16x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-02:   %[[M:.*]] = dim %{{.*}}, 0 : memref<10x12xf32, #[[strided2D]]>
-//   TILE-02-NOT:   affine.min
-//       TILE-02:   %[[sCj:.*]] = subview %{{.*}}[%[[C0]], %[[J]]] [%[[M]], %[[C2]]] [%[[C1]], %[[C1]]] : memref<10x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-02:   linalg.matmul(%{{.*}}, %[[sBj]], %[[sCj]]) : memref<10x16xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-02:   %[[MIN2:.*]] = affine.min #map2(%[[J]])
+//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[0, %[[J]]] [16, %[[MIN2]]] [1, 1] : memref<16x12xf32, #[[strided2D]]> to memref<16x?xf32, #[[strided2D]]>
+//       TILE-02:   %[[MIN22:.*]] = affine.min #map2(%[[J]])
+//       TILE-02:   %[[sCj:.*]] = subview %{{.*}}[0, %[[J]]] [10, %[[MIN22]]] [1, 1] : memref<10x12xf32, #[[strided2D]]> to memref<10x?xf32, #[[strided2D]]>
+//       TILE-02:   linalg.matmul(%{{.*}}, %[[sBj]], %[[sCj]]) : memref<10x16xf32, #[[strided2D]]>, memref<16x?xf32, #[[strided2D]]>, memref<10x?xf32, #[[strided2D]]>
 
 // TILE-002-LABEL: func @matmul_static(
 //       TILE-002-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-002-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-002-DAG: %[[C2:.*]] = constant 2 : index
-//       TILE-002: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//       TILE-002:   %[[M:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
-//   TILE-002-NOT:   affine.min
-//       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[%[[C0]], %[[K]]] [%[[M]], %[[C2]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-002:   %[[N:.*]] = dim %{{.*}}, 1 : memref<16x12xf32, #[[strided2D]]>
-//   TILE-002-NOT:   affine.min
-//       TILE-002:   %[[sBj:.*]] = subview %{{.*}}[%[[K]], %[[C0]]] [%[[C2]], %[[N]]] [%[[C1]], %[[C1]]] : memref<16x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//       TILE-002:   linalg.matmul(%[[sAj]], %[[sBj]], %{{.*}}) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>, memref<10x12xf32, #[[strided2D]]>
+//       TILE-002-DAG: %[[C16:.*]] = constant 16 : index
+//       TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
+//       TILE-002:   %[[MIN2:.*]] = affine.min #map2(%[[K]])
+//       TILE-002:   %[[sAj:.*]] = subview %{{.*}}[0, %[[K]]] [10, %[[MIN2]]] [1, 1] : memref<10x16xf32, #[[strided2D]]> to memref<10x?xf32, #[[strided2D]]>
+//       TILE-002:   %[[MIN22:.*]] = affine.min #map2(%[[K]])
+//       TILE-002:   %[[sBj:.*]] = subview %{{.*}}[%[[K]], 0] [%[[MIN22]], 12] [1, 1] : memref<16x12xf32, #[[strided2D]]> to memref<?x12xf32, #[[strided2D]]>
+//       TILE-002:   linalg.matmul(%[[sAj]], %[[sBj]], %{{.*}}) : memref<10x?xf32, #[[strided2D]]>, memref<?x12xf32, #[[strided2D]]>, memref<10x12xf32, #[[strided2D]]>
 
 // TILE-234-LABEL: func @matmul_static(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-234-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234-DAG: %[[C3:.*]] = constant 3 : index
 //       TILE-234-DAG: %[[C4:.*]] = constant 4 : index
-//       TILE-234: %[[ubM:.*]] = dim %{{.*}}, 0 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-234: %[[ubK:.*]] = dim %{{.*}}, 1 : memref<10x16xf32, #[[strided2D]]>
-//       TILE-234: %[[ubN:.*]] = dim %{{.*}}, 1 : memref<16x12xf32, #[[strided2D]]>
-//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
-//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
-//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-//   TILE-234-NOT:   affine.min
-//       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%[[C2]], %[[C4]]] [%[[C1]], %[[C1]]] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//   TILE-234-NOT:   affine.min
-//       TILE-234:        %[[sBkj:.*]] = subview %{{.*}}[%[[K]], %[[J]]] [%[[C4]], %[[C3]]] [%[[C1]], %[[C1]]] : memref<16x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
-//   TILE-234-NOT:   affine.min
-//       TILE-234:        %[[sCij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%[[C2]], %[[C3]]] [%[[C1]], %[[C1]]] : memref<10x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-234-DAG: %[[C10:.*]] = constant 10 : index
+//       TILE-234-DAG: %[[C16:.*]] = constant 16 : index
+//       TILE-234-DAG: %[[C12:.*]] = constant 12 : index
+//       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[C10]] step %{{.*}} {
+//       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[C12]] step %{{.*}} {
+//       TILE-234:      scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[C16]] step %{{.*}} {
+//       TILE-234:        %[[sAik:.*]] = subview %{{.*}}[%[[I]], %[[K]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<10x16xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
+//       TILE-234:        %[[sBkj:.*]] = subview %{{.*}}[%[[K]], %[[J]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<16x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
+//       TILE-234:        %[[sCij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%{{.*}}, %{{.*}}] [1, 1] : memref<10x12xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //
-//       TILE-234:        linalg.matmul(%[[sAik]], %[[sBkj]], %[[sCij]]) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-234:        linalg.matmul(%[[sAik]], %[[sBkj]], %[[sCij]]) : memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>, memref<?x?xf32, #[[strided2D]]>
 
 func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>, %arg2: memref<?xf32, offset: ?, strides: [1]>) {
   linalg.matvec(%arg0, %arg1, %arg2) : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]>
   return
 }
 // TILE-2-LABEL: func @matvec(
+//  TILE-2-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
+//  TILE-2-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
+//  TILE-2-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
 //       TILE-2-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-//       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
+//       TILE-2:   %[[localM:.*]] = dim %[[ARG0]], 0
+//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[I]])[%[[localM]]]
 //       TILE-2:   %[[N:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], %[[C0]]] [%[[szM]], %[[N]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-2:   %[[localN:.*]] = dim %{{.*}}, 0
-//       TILE-2:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[I]])
-//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]]] [%[[szN]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
-//       TILE-2:   linalg.matvec(%[[sAi]], %{{.*}}, %[[sCi]]) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?xf32, #[[strided1D]]>, memref<?xf32, #[[strided1D_dynamic]]>
+//       TILE-2:   %[[szN:.*]] = affine.min #[[bound_map]](%[[I]])[%[[localN]]]
+//       TILE-2:   %[[sCi:.*]] = subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
+//       TILE-2:   linalg.matvec(%[[sAi]], %{{.*}}, %[[sCi]]) : memref<?x?xf32, #[[strided2D]]>, memref<?xf32, #[[strided1D]]>, memref<?xf32, #[[strided1D]]>
 
 // TILE-02-LABEL: func @matvec(
+// TILE-02-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
+// TILE-02-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
+// TILE-02-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
 //       TILE-02-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-02-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-02-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-02: %[[K:.*]] = dim %{{.*}}, 1 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02: scf.for %[[J]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
 //       TILE-02:   %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localN:.*]] = dim %{{.*}}, 1
-//       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[J]])
-//       TILE-02:   %[[sAj:.*]] = subview %{{.*}}[%[[C0]], %[[J]]] [%[[M]], %[[szN]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[J]])[%[[localN]]]
+//       TILE-02:   %[[sAj:.*]] = subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-02:   %[[localN:.*]] = dim %{{.*}}, 0
-//       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localN]], %[[J]])
-//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[%[[J]]] [%[[szN]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
-//       TILE-02:   linalg.matvec(%[[sAj]], %[[sBj]], %{{.*}}) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D]]>
+//       TILE-02:   %[[szN:.*]] = affine.min #[[bound_map]](%[[J]])[%[[localN]]]
+//       TILE-02:   %[[sBj:.*]] = subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
+//       TILE-02:   linalg.matvec(%[[sAj]], %[[sBj]], %{{.*}}) : memref<?x?xf32, #[[strided2D]]>, memref<?xf32, #[[strided1D]]>, memref<?xf32, #[[strided1D]]>
 
 // TILE-002-LABEL: func @matvec(
+// TILE-002-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
+// TILE-002-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
+// TILE-002-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
 //   TILE-002-NOT: scf.for
 
 // TILE-234-LABEL: func @matvec(
+// TILE-234-SAME: %[[ARG0:[0-9a-zA-Z]*]]: memref
+// TILE-234-SAME: %[[ARG1:[0-9a-zA-Z]*]]: memref
+// TILE-234-SAME: %[[ARG2:[0-9a-zA-Z]*]]: memref
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-234-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234-DAG: %[[C3:.*]] = constant 3 : index
 //       TILE-234: %[[M:.*]] = dim %{{.*}}, 0 : memref<?x?xf32, #[[strided2D]]>
@@ -232,18 +219,18 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
 //       TILE-234:  scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-234:    scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
 //       TILE-234:      %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-234:      %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
+//       TILE-234:      %[[szM:.*]] = affine.min #[[bound_map_2]](%[[I]])[%[[localM]]]
 //       TILE-234:      %[[localN:.*]] = dim %{{.*}}, 1
-//       TILE-234:      %[[szN:.*]] = affine.min #[[bound_map_3]](%[[C3]], %[[localN]], %[[J]])
-//       TILE-234:      %[[sAij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [%[[C1]], %[[C1]]] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D_dynamic]]>
+//       TILE-234:      %[[szN:.*]] = affine.min #[[bound_map_3]](%[[J]])[%[[localN]]]
+//       TILE-234:      %[[sAij:.*]] = subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[strided2D]]> to memref<?x?xf32, #[[strided2D]]>
 //       TILE-234:      %[[localN:.*]] = dim %{{.*}}, 0
-//       TILE-234:      %[[szN:.*]] = affine.min #[[bound_map_3]](%[[C3]], %[[localN]], %[[J]])
-//       TILE-234:      %[[sBj:.*]] = subview %{{.*}}[%[[J]]] [%[[szN]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
+//       TILE-234:      %[[szN:.*]] = affine.min #[[bound_map_3]](%[[J]])[%[[localN]]]
+//       TILE-234:      %[[sBj:.*]] = subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
 //       TILE-234:      %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-234:      %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
-//       TILE-234:      %[[sCi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
+//       TILE-234:      %[[szM:.*]] = affine.min #[[bound_map_2]](%[[I]])[%[[localM]]]
+//       TILE-234:      %[[sCi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
 //
-//       TILE-234:      linalg.matvec(%[[sAij]], %[[sBj]], %[[sCi]]) : memref<?x?xf32, #[[strided2D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>
+//       TILE-234:      linalg.matvec(%[[sAij]], %[[sBj]], %[[sCi]]) : memref<?x?xf32, #[[strided2D]]>, memref<?xf32, #[[strided1D]]>, memref<?xf32, #[[strided1D]]>
 
 func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, offset: ?, strides: [1]>, %arg2: memref<f32>) {
   linalg.dot(%arg0, %arg1, %arg2) : memref<?xf32, offset: ?, strides: [1]>, memref<?xf32, offset: ?, strides: [1]>, memref<f32>
@@ -251,17 +238,16 @@ func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, of
 }
 // TILE-2-LABEL: func @dot(
 //       TILE-2-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-2-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-2-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-2: %[[M:.*]] = dim %{{.*}}, 0 : memref<?xf32, #[[strided1D]]>
 //       TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
-//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
+//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[I]])[%[[localM]]]
+//       TILE-2:   %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
 //       TILE-2:   %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[C2]], %[[localM]], %[[I]])
-//       TILE-2:   %[[sBi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
-//       TILE-2:   linalg.dot(%[[sAi]], %[[sBi]], {{.*}}) : memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<f32>
+//       TILE-2:   %[[szM:.*]] = affine.min #[[bound_map]](%[[I]])[%[[localM]]]
+//       TILE-2:   %[[sBi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
+//       TILE-2:   linalg.dot(%[[sAi]], %[[sBi]], {{.*}}) : memref<?xf32, #[[strided1D]]>, memref<?xf32, #[[strided1D]]>, memref<f32>
 
 // TILE-02-LABEL: func @dot(
 //   TILE-02-NOT: scf.for
@@ -271,17 +257,16 @@ func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, of
 
 // TILE-234-LABEL: func @dot(
 //       TILE-234-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-234-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-234-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-234:  %[[ubK:.*]] = dim %{{.*}}, 0 : memref<?xf32, #[[strided1D]]>
 //       TILE-234:  scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
 //       TILE-234:    %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-234:    %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
-//       TILE-234:    %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
+//       TILE-234:    %[[szM:.*]] = affine.min #[[bound_map_2]](%[[I]])[%[[localM]]]
+//       TILE-234:    %[[sAi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
 //       TILE-234:    %[[localM:.*]] = dim %{{.*}}, 0
-//       TILE-234:    %[[szM:.*]] = affine.min #[[bound_map_2]](%[[C2]], %[[localM]], %[[I]])
-//       TILE-234:    %[[sBi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [%[[C1]]] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D_dynamic]]>
-//       TILE-234:    linalg.dot(%[[sAi]], %[[sBi]], %{{.*}}) : memref<?xf32, #[[strided1D_dynamic]]>, memref<?xf32, #[[strided1D_dynamic]]>, memref<f32>
+//       TILE-234:    %[[szM:.*]] = affine.min #[[bound_map_2]](%[[I]])[%[[localM]]]
+//       TILE-234:    %[[sBi:.*]] = subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[strided1D]]> to memref<?xf32, #[[strided1D]]>
+//       TILE-234:    linalg.dot(%[[sAi]], %[[sBi]], %{{.*}}) : memref<?xf32, #[[strided1D]]>, memref<?xf32, #[[strided1D]]>, memref<f32>
 
 func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {
   linalg.fill(%arg0, %arg1) : memref<127x99xf32>, f32
@@ -291,13 +276,13 @@ func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {
 //       TILE-2:   for
 //   TILE-2-NOT:   for
 //       TILE-2:       subview{{.*}} : memref<127x99xf32>
-//       TILE-2:       linalg.fill{{.*}} : memref<?x?xf32, #[[strided2D_dynamic]]>, f32
+//       TILE-2:       linalg.fill{{.*}} : memref<?x99xf32, #[[stride_99_1_layout_map]]>, f32
 
 // TILE-02-LABEL: func @fill_static
 //       TILE-02:   for
 //   TILE-02-NOT:   for
 //       TILE-02:       subview{{.*}} : memref<127x99xf32>
-//       TILE-02:       linalg.fill{{.*}} : memref<?x?xf32, #[[strided2D_dynamic]]>, f32
+//       TILE-02:       linalg.fill{{.*}} : memref<127x?xf32, #[[stride_99_1_layout_map]]>, f32
 
 // TILE-002-LABEL: func @fill_static
 //   TILE-002-NOT:   for
@@ -308,7 +293,7 @@ func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) {
 //       TILE-234:     for
 //   TILE-234-NOT:   for
 //       TILE-234:       subview{{.*}} : memref<127x99xf32>
-//       TILE-234:       linalg.fill{{.*}} : memref<?x?xf32, #[[strided2D_dynamic]]>, f32
+//       TILE-234:       linalg.fill{{.*}} : memref<?x?xf32, #[[stride_99_1_layout_map]]>, f32
 
 
 func @fill(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: f32) {

diff  --git a/mlir/test/Dialect/Linalg/tile_conv.mlir b/mlir/test/Dialect/Linalg/tile_conv.mlir
index e1f03fa9cf49..4b7698cb9b5a 100644
--- a/mlir/test/Dialect/Linalg/tile_conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv.mlir
@@ -1,10 +1,9 @@
 // RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
 
 // TILE-23004-DAG: #[[D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)>
-// TILE-23004-DAG: #[[S0x10p90:.*]] = affine_map<()[s0] -> (s0 * 10 + 90)>
+// TILE-23004-DAG: #[[S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 90, d0 * -30 + s1)>
 // TILE-23004-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
-// TILE-23004-DAG: #[[strided4D_dynamic:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>
-// TILE-23004-DAG: #[[bound_map_4:.*]] = affine_map<(d0, d1, d2) -> (4, d1 - d2)>
+// TILE-23004-DAG: #[[bound_map_4:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
 
 func @conv(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg2: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>) {
   linalg.conv(%arg0, %arg1, %arg2) {dilations = [10, 20], strides = [30, 40]} : memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>
@@ -13,7 +12,6 @@ func @conv(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1:
 // TILE-23004-LABEL: func @conv(
 //       TILE-23004:   %{{.*}}: memref<?x?x?x?xf32, #[[strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[strided4D]]>, %{{.*}}: memref<?x?x?x?xf32, #[[strided4D]]>) {
 //       TILE-23004-DAG: %[[C0:.*]] = constant 0 : index
-//       TILE-23004-DAG: %[[C1:.*]] = constant 1 : index
 //       TILE-23004-DAG: %[[C2:.*]] = constant 2 : index
 //       TILE-23004-DAG: %[[C3:.*]] = constant 3 : index
 //       TILE-23004-DAG: %[[C4:.*]] = constant 4 : index
@@ -27,19 +25,20 @@ func @conv(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1:
 //       TILE-23004:         %[[Z0:.*]] = dim %{{.*}}, 0 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[Z1:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[Z2:.*]] = dim %{{.*}}, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       TILE-23004:         %[[szK:.*]] = affine.min #[[bound_map_4]](%[[C4]], %[[Z2]], %[[ivK]])
+//       TILE-23004:         %[[szK:.*]] = affine.min #[[bound_map_4]](%[[ivK]])[%[[Z2]]]
 //       TILE-23004:         %[[K:.*]] = dim %{{.*}}, 3 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       TILE-23004:         %[[FilterView:.*]] = subview %{{.*}}[%[[C0]], %[[C0]], %[[ivK]], %[[C0]]] [%[[Z0]], %[[Z1]], %[[szK]], %[[K]]] [%[[C1]], %[[C1]], %[[C1]], %[[C1]]] : memref<?x?x?x?xf32, #[[strided4D]]> to memref<?x?x?x?xf32, #[[strided4D_dynamic]]>
+//       TILE-23004:         %[[FilterView:.*]] = subview %{{.*}}[0, 0, %[[ivK]], 0] [%[[Z0]], %[[Z1]], %[[szK]], %[[K]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[strided4D]]> to memref<?x?x?x?xf32, #[[strided4D]]>
 //
 //       TILE-23004:         %[[J1:.*]] = affine.apply #[[D0x30pS0x10]](%[[ivJ]])
-//       T__ILE-23004:         %[[I1pStep:.*]] = affine.apply #[[S0x10p90]]()[%[[I1]]]
+//       TILE-23004:         %[[PaddedInput0b:.*]] = dim %{{.*}}, 1 : memref<?x?x?x?xf32, #[[strided4D]]>
+//       TILE-23004:         %[[I1pStep:.*]] = affine.min #[[S0x10p90D0x30pS1]](%[[ivJ]])[%[[PaddedInput0]], %[[PaddedInput0b]]]
 //       TILE-23004:         %[[SZ2:.*]] = dim %{{.*}}, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[dim3:.*]] = dim %{{.*}}, 3
-//       TILE-23004:         %[[sz3:.*]] = affine.min #[[bound_map_4]](%[[C4]], %[[dim3]], %[[ivK]]
-//       TILE-23004:         %[[InputView:.*]] = subview %{{.*}}[%[[ivI]], %[[J1]], %[[C0]], %[[ivK]]] [%{{.*}}, %{{.*}}, %[[SZ2]], %[[sz3]]] [%[[C1]], %[[C1]], %[[C1]], %[[C1]]] : memref<?x?x?x?xf32, #[[strided4D]]> to memref<?x?x?x?xf32, #[[strided4D_dynamic]]>
+//       TILE-23004:         %[[sz3:.*]] = affine.min #[[bound_map_4]](%[[ivK]])[%[[dim3]]]
+//       TILE-23004:         %[[InputView:.*]] = subview %{{.*}}[%[[ivI]], %[[J1]], 0, %[[ivK]]] [%{{.*}}, %{{.*}}, %[[SZ2]], %[[sz3]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[strided4D]]> to memref<?x?x?x?xf32, #[[strided4D]]>
 //
 //       TILE-23004:         %[[X0:.*]] = dim %{{.*}}, 2 : memref<?x?x?x?xf32, #[[strided4D]]>
 //       TILE-23004:         %[[X1:.*]] = dim %{{.*}}, 3 : memref<?x?x?x?xf32, #[[strided4D]]>
-//       TILE-23004:         %[[OutputView:.*]] = subview %{{.*}}[%[[ivI]], %[[ivJ]], %[[C0]], %[[C0]]] [%{{.*}}, %{{.*}}, %[[X0]], %[[X1]]] [%[[C1]], %[[C1]], %[[C1]], %[[C1]]] : memref<?x?x?x?xf32, #[[strided4D]]> to memref<?x?x?x?xf32, #[[strided4D_dynamic]]>
+//       TILE-23004:         %[[OutputView:.*]] = subview %{{.*}}[%[[ivI]], %[[ivJ]], 0, 0] [%{{.*}}, %{{.*}}, %[[X0]], %[[X1]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[strided4D]]> to memref<?x?x?x?xf32, #[[strided4D]]>
 //
-//       TILE-23004:         linalg.conv(%[[FilterView]], %[[InputView]], %[[OutputView]]) {dilations = [10, 20], strides = [30, 40]} : memref<?x?x?x?xf32, #[[strided4D_dynamic]]>, memref<?x?x?x?xf32, #[[strided4D_dynamic]]>, memref<?x?x?x?xf32, #[[strided4D_dynamic]]>
+//       TILE-23004:         linalg.conv(%[[FilterView]], %[[InputView]], %[[OutputView]]) {dilations = [10, 20], strides = [30, 40]} : memref<?x?x?x?xf32, #[[strided4D]]>, memref<?x?x?x?xf32, #[[strided4D]]>, memref<?x?x?x?xf32, #[[strided4D]]>

diff  --git a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
index 4ce27b379b19..98cecc3e81e2 100644
--- a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
@@ -1,10 +1,9 @@
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 --dump-input-on-failure
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000 --dump-input-on-failure
 
 // TILE-23004-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
 // TILE-20000-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
-// TILE-20000-DAG: #[[minmap:.*]] = affine_map<(d0, d1, d2) -> (2, d1 - d2)>
-// TILE-20000-DAG: #[[subviewstride:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3 * s4)>
+// TILE-20000-DAG: #[[minmap:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
 
 func @conv_padding(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg2: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>) {
   linalg.conv(%arg0, %arg1, %arg2) {dilations = [10, 20], padding = dense<[[1, 1], [0, 1]]> : tensor<2x2xi64>, strides = [30, 40]} : memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>
@@ -21,20 +20,19 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>,
 //  TILE-20000-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?x?x?xf32, #[[strided4D]]>
 //  TILE-20000-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?x?x?xf32, #[[strided4D]]>)
 //   TILE-20000-DAG:   %[[C0:.*]] = constant 0 : index
-//   TILE-20000-DAG:   %[[C1:.*]] = constant 1 : index
 //   TILE-20000-DAG:   %[[C2:.*]] = constant 2 : index
 //       TILE-20000:   %[[B:.*]] = dim %[[ARG1]], 0
 //       TILE-20000:   scf.for %[[ivI:.*]] = %[[C0]] to %[[B]] step %[[C2]] {
 //       TILE-20000:     %[[DIM10:.*]] = dim %[[ARG1]], 0
-//       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[minmap]](%[[C2]], %[[DIM10]], %[[ivI]])
+//       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[minmap]](%[[ivI]])[%[[DIM10]]]
 //       TILE-20000:     %[[DIM11:.*]] = dim %[[ARG1]], 1
 //       TILE-20000:     %[[DIM12:.*]] = dim %[[ARG1]], 2
 //       TILE-20000:     %[[DIM13:.*]] = dim %[[ARG1]], 3
-//       TILE-20000:     %[[SUBVIEW1:.*]] = subview %[[ARG1]][%[[ivI]], %[[C0]], %[[C0]], %[[C0]]] [%[[EXTENT]], %[[DIM11]], %[[DIM12]], %[[DIM13]]]
+//       TILE-20000:     %[[SUBVIEW1:.*]] = subview %[[ARG1]][%[[ivI]], 0, 0, 0] [%[[EXTENT]], %[[DIM11]], %[[DIM12]], %[[DIM13]]]
 //       TILE-20000:     %[[DIM20:.*]] = dim %[[ARG2]], 0
-//       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[minmap]](%[[C2]], %[[DIM20]], %[[ivI]])
+//       TILE-20000:     %[[EXTENT:.*]] = affine.min #[[minmap]](%[[ivI]])[%[[DIM20]]]
 //       TILE-20000:     %[[DIM21:.*]] = dim %[[ARG2]], 1
 //       TILE-20000:     %[[DIM22:.*]] = dim %[[ARG2]], 2
 //       TILE-20000:     %[[DIM23:.*]] = dim %[[ARG2]], 3
-//       TILE-20000:     %[[SUBVIEW2:.*]] = subview %[[ARG2]][%[[ivI]], %[[C0]], %[[C0]], %[[C0]]] [%[[EXTENT]], %[[DIM21]], %[[DIM22]], %[[DIM23]]]
+//       TILE-20000:     %[[SUBVIEW2:.*]] = subview %[[ARG2]][%[[ivI]], 0, 0, 0] [%[[EXTENT]], %[[DIM21]], %[[DIM22]], %[[DIM23]]]
 //       TILE-20000:     linalg.conv(%[[ARG0]], %[[SUBVIEW1]], %[[SUBVIEW2]])

diff  --git a/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir b/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
index 93e1189cad35..1f7b8e526e93 100644
--- a/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
+++ b/mlir/test/Dialect/Linalg/tile_indexed_generic.mlir
@@ -65,8 +65,8 @@ func @indexed_generic_matrix(%operand: memref<50x100xf32>, %result: memref<50x10
   return
 }
 // TILE-10n25-LABEL: func @indexed_generic_matrix
-// TILE-10n25: %[[C25:.*]] = constant 25 : index
-// TILE-10n25: %[[C10:.*]] = constant 10 : index
+// TILE-10n25-DAG: %[[C25:.*]] = constant 25 : index
+// TILE-10n25-DAG: %[[C10:.*]] = constant 10 : index
 // TILE-10n25: scf.for %[[K:.*]] = {{.*}} step %[[C10]]
 // TILE-10n25:   scf.for %[[L:.*]] = {{.*}} step %[[C25]]
 // TILE-10n25:     linalg.indexed_generic

diff  --git a/mlir/test/Dialect/Linalg/tile_parallel.mlir b/mlir/test/Dialect/Linalg/tile_parallel.mlir
index ef21ef0301d2..963051b7c7b3 100644
--- a/mlir/test/Dialect/Linalg/tile_parallel.mlir
+++ b/mlir/test/Dialect/Linalg/tile_parallel.mlir
@@ -26,7 +26,6 @@ func @sum(%lhs: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // TILE-2-LABEL: func @sum(
 // TILE-2-SAME:    [[LHS:%.*]]: {{.*}}, [[RHS:%.*]]: {{.*}}, [[SUM:%.*]]: {{.*}}) {
 // TILE-2-DAG: [[C0:%.*]] = constant 0 : index
-// TILE-2-DAG: [[C1:%.*]] = constant 1 : index
 // TILE-2-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-2: [[LHS_ROWS:%.*]] = dim [[LHS]], 0
 // TILE-2: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_ROWS]]) step ([[C2]]) {
@@ -39,7 +38,6 @@ func @sum(%lhs: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // TILE-02-LABEL: func @sum(
 // TILE-02-SAME:    [[LHS:%.*]]: {{.*}}, [[RHS:%.*]]: {{.*}}, [[SUM:%.*]]: {{.*}}) {
 // TILE-02-DAG: [[C0:%.*]] = constant 0 : index
-// TILE-02-DAG: [[C1:%.*]] = constant 1 : index
 // TILE-02-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-02: [[LHS_COLS:%.*]] = dim [[LHS]], 1
 // TILE-02: scf.parallel ([[I:%.*]]) = ([[C0]]) to ([[LHS_COLS]]) step ([[C2]]) {
@@ -57,7 +55,6 @@ func @sum(%lhs: memref<?x?xf32, offset: ?, strides: [?, 1]>,
 // TILE-234-LABEL: func @sum(
 // TILE-234-SAME:    [[LHS:%.*]]: {{.*}}, [[RHS:%.*]]: {{.*}}, [[SUM:%.*]]: {{.*}}) {
 // TILE-234-DAG: [[C0:%.*]] = constant 0 : index
-// TILE-234-DAG: [[C1:%.*]] = constant 1 : index
 // TILE-234-DAG: [[C2:%.*]] = constant 2 : index
 // TILE-234-DAG: [[C3:%.*]] = constant 3 : index
 // TILE-234: [[LHS_ROWS:%.*]] = dim [[LHS]], 0

diff  --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
index 70ba828e4953..e38153058419 100644
--- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
@@ -170,8 +170,9 @@ getMatmulToVectorCanonicalizationPatterns(MLIRContext *context) {
 }
 
 static void fillL1TilingAndMatmulToVectorPatterns(
-    MLIRContext *context, StringRef startMarker,
+    FuncOp funcOp, StringRef startMarker,
     SmallVectorImpl<OwningRewritePatternList> &patternsVector) {
+  MLIRContext *context = funcOp.getContext();
   patternsVector.emplace_back(LinalgTilingPattern<MatmulOp>(
       context,
       LinalgTilingOptions().setTileSizes({8, 12, 16}).setInterchange({1, 0, 2}),
@@ -195,7 +196,7 @@ void TestLinalgTransforms::runOnFunction() {
   } else {
     SmallVector<OwningRewritePatternList, 4> stage1Patterns;
     if (testMatmulToVectorPatterns1dTiling) {
-      fillL1TilingAndMatmulToVectorPatterns(&getContext(), "START",
+      fillL1TilingAndMatmulToVectorPatterns(getFunction(), "START",
                                             stage1Patterns);
     } else if (testMatmulToVectorPatterns2dTiling) {
       stage1Patterns.emplace_back(
@@ -204,7 +205,7 @@ void TestLinalgTransforms::runOnFunction() {
                                             .setTileSizes({768, 264, 768})
                                             .setInterchange({1, 2, 0}),
                                         LinalgMarker({"START"}, "L2")));
-      fillL1TilingAndMatmulToVectorPatterns(&getContext(), "L2",
+      fillL1TilingAndMatmulToVectorPatterns(getFunction(), "L2",
                                             stage1Patterns);
     }
     OwningRewritePatternList stage2Patterns =


        


More information about the Mlir-commits mailing list