[Mlir-commits] [mlir] c95a724 - [mlir][linalg] Tiling: Use loop ub in extract_slice size computation if possible
Matthias Springer
llvmlistbot at llvm.org
Wed Sep 8 19:11:08 PDT 2021
Author: Matthias Springer
Date: 2021-09-09T11:06:22+09:00
New Revision: c95a7246a38afb0a7c1f371d1591a96617aa4d73
URL: https://github.com/llvm/llvm-project/commit/c95a7246a38afb0a7c1f371d1591a96617aa4d73
DIFF: https://github.com/llvm/llvm-project/commit/c95a7246a38afb0a7c1f371d1591a96617aa4d73.diff
LOG: [mlir][linalg] Tiling: Use loop ub in extract_slice size computation if possible
When tiling a LinalgOp, extract_slice/insert_slice pairs are inserted. To avoid going out-of-bounds when the tile size does not divide the shape size evenly (at the boundary), AffineMin ops are inserted. Some ops have assumptions regarding the dimensions of inputs/outputs. E.g., in a `A * B` matmul, `dim(A, 1) == dim(B, 0)`. However, loop bounds use either `dim(A, 1)` or `dim(B, 0)`.
With this change, AffineMin ops are expressed in terms of loop bounds instead of tensor sizes. (Both have the same runtime value.) This simplifies canonicalizations.
Differential Revision: https://reviews.llvm.org/D109267
Added:
Modified:
mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
mlir/lib/Dialect/Linalg/Utils/Utils.cpp
mlir/test/Dialect/Linalg/fusion-pattern.mlir
mlir/test/Dialect/Linalg/fusion-sequence.mlir
mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
mlir/test/Dialect/Linalg/fusion.mlir
mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
mlir/test/Dialect/Linalg/tile-conv-padding.mlir
mlir/test/Dialect/Linalg/tile-conv.mlir
mlir/test/Dialect/Linalg/tile-simple-conv.mlir
mlir/test/Dialect/Linalg/tile.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 81ab7eaa08866..c451d0d677d57 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -91,7 +91,7 @@ SmallVector<Value> computeTileSizes(OpBuilder &b, Location loc, ValueRange ivs,
/// at offsets `lbs` and with sizes `subShapeSizes`.
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
ValueRange tileSizes, AffineMap map, ValueRange lbs,
- ValueRange subShapeSizes);
+ ValueRange ubs, ValueRange subShapeSizes);
/// Creates extract_slice/subview ops for all `valuesToTile` of the given
/// `linalgOp` with `builder`, assuming `linalgOp` is being fused into a loop
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
index 3e50f352a38d8..71556903245bd 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -177,19 +177,18 @@ static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
auto one = b.create<ConstantIndexOp>(loc, 1);
for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
+ auto shapeDim = getShapeDefiningLoopRange(producer, i);
+ Value dim = createOrFoldDimOp(b, loc, shapeDim.shape, shapeDim.dimension);
+ sizeBounds.push_back(dim);
auto it = fusedLoopsAndRanges.find(i);
if (it != fusedLoopsAndRanges.end()) {
ivs.push_back(it->second.offset);
tileSizes.push_back(it->second.size);
- sizeBounds.push_back(nullptr);
loopRanges.push_back(it->second);
LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "
<< loopRanges.back() << "\n");
} else {
- auto shapeDim = getShapeDefiningLoopRange(producer, i);
- Value dim = createOrFoldDimOp(b, loc, shapeDim.shape, shapeDim.dimension);
tileSizes.push_back(zero);
- sizeBounds.push_back(dim);
loopRanges.push_back(Range{zero, dim, one});
LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "
<< loopRanges.back() << "\n");
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index acef26a281437..c685f36d712e9 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -370,8 +370,9 @@ static LogicalResult tilePadTensorOp(OpBuilder &builder, PadTensorOp op,
assert(static_cast<int64_t>(tileSizes.size()) == rank);
// Compute lower and upper bounds of the loop nest.
SmallVector<Range> ranges = op.getLoopBounds(builder);
- SmallVector<Value> lbs, dims, steps;
+ SmallVector<Value> lbs, dims, allDims, steps;
for (int64_t i = 0; i < rank; ++i) {
+ allDims.push_back(ranges[i].size);
if (!isZero(tileSizes[i])) {
lbs.push_back(ranges[i].offset);
dims.push_back(ranges[i].size);
@@ -388,13 +389,14 @@ static LogicalResult tilePadTensorOp(OpBuilder &builder, PadTensorOp op,
SmallVector<Value> offsets =
computeTileOffsets(b, loc, localIvs, tileSizes);
SmallVector<Value> sizes =
- computeTileSizes(b, loc, localIvs, tileSizes, dims);
+ computeTileSizes(b, loc, localIvs, tileSizes, allDims);
// Create ExtractSliceOp: Extract a tile from the PadTensorOp.
// Note: The PadTensorOp is located outside of the loop nest. It is
// later moved inside by ExtractSliceOfPadTensorSwapPattern.
auto map = AffineMap::getMultiDimIdentityMap(rank, b.getContext());
- Value tiledOutput = makeTiledShape(b, loc, newPadOp->getResult(0),
- tileSizes, map, offsets, sizes);
+ Value tiledOutput =
+ makeTiledShape(b, loc, newPadOp->getResult(0), tileSizes, map,
+ offsets, allDims, sizes);
auto sliceOp = tiledOutput.getDefiningOp<tensor::ExtractSliceOp>();
assert(sliceOp && "expected ExtractSliceOp");
// Insert the tile into the output tensor.
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 596ae49232c6d..9237c003297be 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -514,7 +514,7 @@ void GenerateLoopNest<scf::ParallelOp>::doit(
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
ValueRange tileSizes, AffineMap map, ValueRange lbs,
- ValueRange subShapeSizes) {
+ ValueRange ubs, ValueRange subShapeSizes) {
auto shapedType = valueToTile.getType().dyn_cast<ShapedType>();
assert(shapedType && "only shaped types can be tiled");
ArrayRef<int64_t> shape = shapedType.getShape();
@@ -567,7 +567,7 @@ Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
AffineMap::inferFromExprList(
ArrayRef<ArrayRef<AffineExpr>>{{dim0, dim1 - dim2}})
.front();
- Value d = createOrFoldDimOp(builder, loc, valueToTile, r);
+ Value d = applyMapToValues(builder, loc, m, ubs).front();
SmallVector<Value, 4> operands{size, d, offset};
fullyComposeAffineMapAndOperands(&minMap, &operands);
size = builder.create<AffineMinOp>(loc, builder.getIndexType(), minMap,
@@ -656,8 +656,8 @@ SmallVector<Value, 4> makeTiledShapes(OpBuilder &b, Location loc,
}
LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
- tiledShapes.push_back(
- makeTiledShape(b, loc, shapedOp, tileSizes, map, lbs, subShapeSizes));
+ tiledShapes.push_back(makeTiledShape(b, loc, shapedOp, tileSizes, map, lbs,
+ sizeBounds, subShapeSizes));
}
return tiledShapes;
diff --git a/mlir/test/Dialect/Linalg/fusion-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-pattern.mlir
index 9b9b24461ec7a..84f228b712d1d 100644
--- a/mlir/test/Dialect/Linalg/fusion-pattern.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-pattern.mlir
@@ -43,12 +43,10 @@ module {
// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[N]]]
// CHECK: %[[SV2:.+]] = memref.subview %[[ARG1]][0, %[[IV1]]]
// CHECK-SAME: %[[K_2]], %[[TILE_N]]
+// CHECK: %[[SV3:.+]] = memref.subview %[[ARG2]][%[[IV0]], %[[IV1]]]
+// CHECK-SAME: [%[[TILE_M]], %[[TILE_N]]]
// CHECK: %[[M_2:.+]] = memref.dim %[[ARG2]], %[[C0]]
-// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
// CHECK: %[[N_2:.+]] = memref.dim %[[ARG2]], %[[C1]]
-// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[N_2]]]
-// CHECK: %[[SV3:.+]] = memref.subview %[[ARG2]][%[[IV0]], %[[IV1]]]
-// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]]
// CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[M_2]], %[[M]]]
// CHECK: %[[TILE_N_3:.+]] = affine.min #[[MAP5]](%[[IV1]])[%[[N_2]], %[[N]]]
// CHECK: %[[SV3_2:.+]] = memref.subview %[[ARG2]][%[[IV0]], %[[IV1]]]
@@ -59,9 +57,8 @@ module {
// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K]]]
// CHECK: %[[SV4:.+]] = memref.subview %[[SV1]][0, %[[IV2]]]
// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]]
-// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K_2]]]
// CHECK: %[[SV5:.+]] = memref.subview %[[SV2]][%[[IV2]], 0]
-// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]]
+// CHECK-SAME: [%[[TILE_K]], %[[TILE_N]]]
// CHECK: linalg.matmul
// CHECK-SAME: __internal_linalg_transform__ = "after_basic_fusion"
// CHECK-SAME: ins(%[[SV4]], %[[SV5]]
@@ -112,18 +109,15 @@ module {
// CHECK: %[[SV1:.+]] = memref.subview %[[ARG2]][0, %[[IV0]]]
// CHECK-SAME: [%[[K]], %[[TILE_N]]]
// CHECK: %[[M:.+]] = memref.dim %[[ARG3]], %[[C0]]
-// CHECK: %[[N_2:.+]] = memref.dim %[[ARG3]], %[[C1]]
-// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[N_2]]]
// CHECK: %[[SV2:.+]] = memref.subview %[[ARG3]][0, %[[IV0]]]
-// CHECK-SAME: [%[[M]], %[[TILE_N_2]]]
-// CHECK: %[[K_2:.+]] = memref.dim %[[ARG1]], %[[C0]]
+// CHECK-SAME: [%[[M]], %[[TILE_N]]
// CHECK: %[[N_3:.+]] = memref.dim %[[ARG1]], %[[C1]]
+// CHECK: %[[K_2:.+]] = memref.dim %[[ARG1]], %[[C0]]
// CHECK: %[[TILE_N_3:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[N_3]], %[[N]]]
// CHECK: %[[SV3:.+]] = memref.subview %[[ARG1]][0, %[[IV0]]]
// CHECK-SAME: [%[[K_2]], %[[TILE_N_3]]]
-// CHECK: %[[TILE_N_4:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[N]], %[[N]]]
// CHECK: %[[SV3_2:.+]] = memref.subview %[[ARG2]][0, %[[IV0]]]
-// CHECK-SAME: [%[[K]], %[[TILE_N_4]]]
+// CHECK-SAME: [%[[K]], %[[TILE_N_3]]]
// CHECK: linalg.copy(%[[SV3]], %[[SV3_2]])
// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion_producer"
// CHECK-NOT: linalg.fill
@@ -136,12 +130,10 @@ module {
// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K_2]]]
// CHECK: %[[SV4:.+]] = memref.subview %[[ARG0]][%[[IV1]], %[[IV2]]]
// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]]
-// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP3]](%[[IV2]])[%[[K]]]
// CHECK: %[[SV5:.+]] = memref.subview %[[SV1]][%[[IV2]], 0]
-// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]]
-// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP2]](%[[IV1]])[%[[M]]]
+// CHECK-SAME: [%[[TILE_K]], %[[TILE_N]]]
// CHECK: %[[SV6:.+]] = memref.subview %[[SV2]][%[[IV1]], 0]
-// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]]
+// CHECK-SAME: [%[[TILE_M]], %[[TILE_N]]]
// CHECK: linalg.matmul
// CHECK-SAME: __internal_linalg_transform__ = "after_rhs_fusion"
// CHECK-SAME: ins(%[[SV4]], %[[SV5]]
@@ -195,11 +187,10 @@ module {
// CHECK: %[[K:.+]] = memref.dim %[[ARG1]], %[[C1]]
// CHECK: %[[SV1:.+]] = memref.subview %[[ARG1]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M]], %[[K]]]
-// CHECK: %[[M_2:.+]] = memref.dim %[[ARG3]], %[[C0]]
-// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
// CHECK: %[[N:.+]] = memref.dim %[[ARG3]], %[[C1]]
// CHECK: %[[SV2:.+]] = memref.subview %[[ARG3]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_2]], %[[N]]]
+// CHECK-SAME: [%[[TILE_M]], %[[N]]]
+// CHECK: %[[M_2:.+]] = memref.dim %[[ARG3]], %[[C0]]
// CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[M_2]], %[[M]]]
// CHECK: %[[SV2_2:.+]] = memref.subview %[[ARG3]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_3]], %[[N]]]
@@ -208,9 +199,8 @@ module {
// CHECK: %[[K_3:.+]] = memref.dim %[[ARG0]], %[[C1]]
// CHECK: %[[SV3:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_4]], %[[K_3]]]
-// CHECK: %[[TILE_M_5:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[M]], %[[M]]]
// CHECK: %[[SV3_2:.+]] = memref.subview %[[ARG1]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_5]], %[[K]]]
+// CHECK-SAME: [%[[TILE_M_4]], %[[K]]]
// CHECK: linalg.copy(%[[SV3]], %[[SV3_2]])
// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion_producer"
// CHECK: linalg.fill(%[[CST]], %[[SV2_2]])
@@ -222,14 +212,11 @@ module {
// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K]]]
// CHECK: %[[SV4:.+]] = memref.subview %[[SV1]][0, %[[IV2]]]
// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]]
-// CHECK: %[[K_2:.+]] = memref.dim %[[ARG2]], %[[C0]]
-// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K_2]]]
// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N_2]]]
// CHECK: %[[SV5:.+]] = memref.subview %[[ARG2]][%[[IV2]], %[[IV1]]]
-// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]]
-// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N]]]
+// CHECK-SAME: [%[[TILE_K]], %[[TILE_N]]]
// CHECK: %[[SV6:.+]] = memref.subview %[[SV2]][0, %[[IV1]]]
-// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]]
+// CHECK-SAME: [%[[TILE_M]], %[[TILE_N]]]
// CHECK: linalg.matmul
// CHECK-SAME: __internal_linalg_transform__ = "after_two_operand_fusion"
// CHECK-SAME: ins(%[[SV4]], %[[SV5]]
@@ -280,19 +267,16 @@ module {
// CHECK: %[[K2:.+]] = memref.dim %[[ARG2]], %[[C1]]
// CHECK: %[[SV1:.+]] = memref.subview %[[ARG2]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M]], %[[K2]]]
-// CHECK: %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
-// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
// CHECK: %[[N:.+]] = memref.dim %[[ARG4]], %[[C1]]
// CHECK: %[[SV2:.+]] = memref.subview %[[ARG4]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_2]], %[[N]]]
+// CHECK-SAME: [%[[TILE_M]], %[[N]]]
// CHECK: %[[M_3:.+]] = memref.dim %[[ARG0]], %[[C0]]
// CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[M_3]], %[[M]]]
// CHECK: %[[K1:.+]] = memref.dim %[[ARG0]], %[[C1]]
// CHECK: %[[SV3:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_3]], %[[K1]]]
-// CHECK: %[[TILE_M_4:.+]] = affine.min #[[MAP4]](%[[IV0]])[%[[M]], %[[M]]]
// CHECK: %[[SV1_2:.+]] = memref.subview %[[ARG2]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_4]], %[[K2]]]
+// CHECK-SAME: [%[[TILE_M_3]], %[[K2]]]
// CHECK: linalg.matmul
// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_producer"
// CHECK-SAME: ins(%[[SV3]], %[[ARG1]]
@@ -305,14 +289,11 @@ module {
// CHECK: %[[TILE_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K2]]]
// CHECK: %[[SV6:.+]] = memref.subview %[[SV1]][0, %[[IV2]]]
// CHECK-SAME: [%[[TILE_M]], %[[TILE_K]]]
-// CHECK: %[[K_2:.+]] = memref.dim %[[ARG3]], %[[C0]]
-// CHECK: %[[TILE_K_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[K_2]]]
// CHECK: %[[TILE_N:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N_2]]]
// CHECK: %[[SV7:.+]] = memref.subview %[[ARG3]][%[[IV2]], %[[IV1]]]
-// CHECK-SAME: [%[[TILE_K_2]], %[[TILE_N]]]
-// CHECK: %[[TILE_N_2:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N]]]
+// CHECK-SAME: [%[[TILE_K]], %[[TILE_N]]]
// CHECK: %[[SV8:.+]] = memref.subview %[[SV2]][0, %[[IV1]]]
-// CHECK-SAME: [%[[TILE_M_2]], %[[TILE_N_2]]]
+// CHECK-SAME: [%[[TILE_M]], %[[TILE_N]]]
// CHECK: linalg.matmul
// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion"
// CHECK-SAME: ins(%[[SV6]], %[[SV7]]
diff --git a/mlir/test/Dialect/Linalg/fusion-sequence.mlir b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
index d397906591913..a18c007fe355a 100644
--- a/mlir/test/Dialect/Linalg/fusion-sequence.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-sequence.mlir
@@ -42,7 +42,8 @@ module {
// CHECK-DAG: %[[SV_ARG0:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
// CHECK-DAG: %[[SV_ARG1:.+]] = memref.subview %[[ARG1]][0, %[[IV1]]]
// CHECK: %[[SV_TEMP_2:.+]] = memref.subview %[[TEMP]][%[[IV0]], %[[IV1]]]
-// CHECK: linalg.fill(%{{.+}}, %[[SV_TEMP_2]])
+// CHECK: %[[SV_TEMP_3:.+]] = memref.subview %[[TEMP]][%[[IV0]], %[[IV1]]]
+// CHECK: linalg.fill(%{{.+}}, %[[SV_TEMP_3]])
// CHECK: linalg.matmul
// CHECK-SAME: ins(%[[SV_ARG0]], %[[SV_ARG1]]
// CHECK-SAME: : memref<?x?xf32, #[[MAP2]]>, memref<?x?xf32, #[[MAP2]]>)
@@ -107,11 +108,10 @@ module {
// CHECK: %[[TILE_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]]
// CHECK: %[[SV_ALLOC3:.+]] = memref.subview %[[ALLOC2]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M]], %[[N2]]]
-// CHECK: %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
-// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M_2]]]
// CHECK: %[[N3:.+]] = memref.dim %[[ARG4]], %[[C1]]
// CHECK: %[[SV_ARG4:.+]] = memref.subview %[[ARG4]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_2]], %[[N3]]]
+// CHECK-SAME: [%[[TILE_M]], %[[N3]]]
+// CHECK: %[[M_2:.+]] = memref.dim %[[ARG4]], %[[C0]]
// CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP2]](%[[IV0]])[%[[M_2]], %[[M]]]
// CHECK: %[[SV_ARG4_2:.+]] = memref.subview %[[ARG4]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_3]], %[[N3]]]
@@ -124,10 +124,12 @@ module {
// CHECK: %[[N0:.+]] = memref.dim %[[ARG0]], %[[C1]]
// CHECK: %[[SV_ARG0:.+]] = memref.subview %[[ARG0]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_5]], %[[N0]]]
+// CHECK: %[[SV_ALLOC4:.+]] = memref.subview %[[ALLOC1]][%[[IV0]], 0]
+// CHECK-SAME: [%[[TILE_M_5]], %[[N1]]]
// CHECK: linalg.fill(%{{.+}}, %[[SV_ALLOC1]])
// CHECK: linalg.matmul ins(%[[SV_ARG0]], %[[ARG1]]
// CHECK-SAME: : memref<?x?xf32, #[[MAP1]]>, memref<?x?xf32>)
-// CHECK-SAME: outs(%[[SV_ALLOC1]] : memref<?x?xf32, #[[MAP1]]>)
+// CHECK-SAME: outs(%[[SV_ALLOC4]] : memref<?x?xf32, #[[MAP1]]>)
// CHECK: linalg.fill(%{{.+}}, %[[SV_ALLOC2]])
// CHECK: linalg.matmul ins(%[[SV_ALLOC1]], %[[ARG2]]
// CHECK-SAME: : memref<?x?xf32, #[[MAP1]]>, memref<?x?xf32>)
@@ -210,7 +212,8 @@ module {
}
}
-// CHECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (16, d0 - d1)>
+// CHaECK: #[[MAP0:.+]] = affine_map<(d0, d1) -> (16, d0 - d1)>
+// CHECK: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
// CHECK: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)>
// CHECK: func @tensor_matmul_fusion(
@@ -226,25 +229,20 @@ module {
// CHECK: %[[M:.+]] = tensor.dim %[[ARG0]], %c0 : tensor<?x?xf32>
// CHECK: %[[R0:.+]] = scf.for %[[IV0:[a-zA-Z0-9_]+]] =
// CHECK-SAME: iter_args(%[[ARG8:.+]] = %[[ARG6]]) -> (tensor<?x?xf32>) {
-// CHECK: %[[M_1:.+]] = tensor.dim %[[ARG8]], %[[C0]]
-// CHECK: %[[TILE_M_1:.+]] = affine.min #[[MAP0]](%[[M_1]], %[[IV0]])
+// CHECK: %[[TILE_M_1:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[M]]]
// CHECK: %[[N3:.+]] = tensor.dim %[[ARG8]], %[[C1]]
// CHECK: %[[STARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_1]], %[[N3]]]
-// CHECK: %[[M_2:.+]] = tensor.dim %[[ARG4]], %[[C0]]
-// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_2]], %[[M]]]
+// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M]], %[[M]]]
// CHECK: %[[N2:.+]] = tensor.dim %[[ARG4]], %[[C1]]
// CHECK: %[[STARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_2]], %[[N2]]]
-// CHECK: %[[TILE_M_3:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M]], %[[M]]]
// CHECK: %[[N0:.+]] = tensor.dim %[[ARG0]], %[[C1]]
// CHECK: %[[STARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_3]], %[[N0]]]
-// CHECK: %[[M_3:.+]] = tensor.dim %[[ARG2]], %[[C0]]
-// CHECK: %[[TILE_M_4:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M_3]], %[[M]]]
+// CHECK-SAME: [%[[TILE_M_2]], %[[N0]]]
// CHECK: %[[N1:.+]] = tensor.dim %[[ARG2]], %[[C1]]
// CHECK: %[[STARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_4]], %[[N1]]]
+// CHECK-SAME: [%[[TILE_M_2]], %[[N1]]]
// CHECK: %[[T0:.+]] = linalg.matmul
// CHECK-SAME: ins(%[[STARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>
// CHECK-SAME: ) outs(%[[STARG2]] : tensor<?x?xf32>)
diff --git a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
index 01dab480d89c6..c76cba0652a7f 100644
--- a/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor-pattern.mlir
@@ -13,10 +13,9 @@ module {
return %ABC : tensor<?x?xf32>
}
}
-// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1) -> (32, d0 - d1)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)>
// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (64, -d0 + s0)>
-// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0, d1) -> (64, d0 - d1)>
// CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 32, -d0 + s1)>
// CHECK: func @matmul_fusion
@@ -35,8 +34,7 @@ module {
// CHECK: %[[RESULT:.+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] =
// CHECK-SAME: %[[C0]] to %[[M]] step %[[C32]]
// CHECK-SAME: iter_args(%[[ARG6:.+]] = %[[ARG4]]) -> (tensor<?x?xf32>) {
-// CHECK: %[[M_2:.+]] = tensor.dim %[[ARG6]], %[[C0]]
-// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[M_2]], %[[IV0]])
+// CHECK: %[[TILE_M_2:.+]] = affine.min #[[MAP1]](%[[IV0]])[%[[M]]]
// CHECK: %[[N3:.+]] = tensor.dim %[[ARG6]], %[[C1]]
// CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_2]], %[[N3]]]
@@ -44,11 +42,9 @@ module {
// CHECK: %[[N1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
// CHECK: %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
// CHECK-SAME: [%[[TILE_M_3]], %[[N1]]]
-// CHECK: %[[M_3:.+]] = tensor.dim %[[ARG2]], %[[C0]]
-// CHECK: %[[TILE_M_4:.+]] = affine.min #[[MAP5]](%[[IV0]])[%[[M_3]], %[[M]]]
// CHECK: %[[N2_2:.+]] = tensor.dim %[[ARG2]], %[[C1]]
// CHECK: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], 0]
-// CHECK-SAME: [%[[TILE_M_4]], %[[N2_2]]]
+// CHECK-SAME: [%[[TILE_M_3]], %[[N2_2]]]
// CHECK: %[[LHS:.+]] = linalg.matmul
// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion_producer"
// CHECK-SAME: ins(%[[ST_ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
@@ -64,23 +60,19 @@ module {
// CHECK: %[[TILE_N2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2]]]
// CHECK: %[[ST_LHS:.+]] = tensor.extract_slice %[[LHS]][0, %[[IV2]]]
// CHECK-SAME: [%[[TILE_M_3]], %[[TILE_N2]]]
-// CHECK: %[[N2_3:.+]] = tensor.dim %[[ARG3]], %[[C0]]
-// CHECK: %[[TILE_N2_2:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[N2_3]]]
// CHECK: %[[TILE_N3:.+]] = affine.min #[[MAP3]](%[[IV1]])[%[[N3_2]]]
// CHECK: %[[ST_ARG3:.+]] = tensor.extract_slice %[[ARG3]][%[[IV2]], %[[IV1]]]
-// CHECK-SAME: [%[[TILE_N2_2]], %[[TILE_N3]]]
+// CHECK-SAME: [%[[TILE_N2]], %[[TILE_N3]]]
// CHECK: %[[M_4:.+]] = tensor.dim %[[ARG10]], %[[C0]]
-// CHECK: %[[N3_3:.+]] = tensor.dim %[[ARG10]], %[[C1]]
-// CHECK: %[[TILE_N3_2:.+]] = affine.min #[[MAP4]](%[[N3_3]], %[[IV1]])
// CHECK: %[[ST_ARG4:.+]] = tensor.extract_slice %[[ARG10]][0, %[[IV1]]]
-// CHECK-SAME: [%[[M_4]], %[[TILE_N3_2]]]
+// CHECK-SAME: [%[[M_4]], %[[TILE_N3]]]
// CHECK: %[[ST_RESULT:.+]] = linalg.matmul
// CHECK-SAME: __internal_linalg_transform__ = "after_lhs_fusion"
// CHECK-SAME: ins(%[[ST_LHS]], %[[ST_ARG3]]
// CHECK-SAME: : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: outs(%[[ST_ARG4]] : tensor<?x?xf32>)
// CHECK: %[[UPDATE1:.+]] = tensor.insert_slice %[[ST_RESULT]]
-// CHECK-SAME: into %[[ARG10]][0, %[[IV1]]] [%[[M_4]], %[[TILE_N3_2]]]
+// CHECK-SAME: into %[[ARG10]][0, %[[IV1]]] [%[[M_4]], %[[TILE_N3]]]
// CHECK: scf.yield %[[UPDATE1]]
// CHECK: }
// CHECK: scf.yield %[[YIELD1]]
diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir
index 9c83ebf3bb2ae..a69db827889d3 100644
--- a/mlir/test/Dialect/Linalg/fusion.mlir
+++ b/mlir/test/Dialect/Linalg/fusion.mlir
@@ -257,9 +257,10 @@ func @f5(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
// CHECK-DAG: #[[BOUND_2_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 2, -d0 + s1)>
// CHECK-DAG: #[[BOUND_4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
// CHECK: func @f5
-// HECK-SAME: (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
+// CHECK-SAME: (%[[A:.*]]:{{.*}}, %[[B:.*]]:{{.*}}, %[[C:.*]]:{{.*}}, %[[D:.*]]:{{.*}}, %[[E:.*]]:{{.*}})
// CHECK-DAG: %[[C0:.*]] = constant 0 : index
// CHECK-DAG: %[[C1:.*]] = constant 1 : index
+// CHECK-DAG: %[[A_0:.*]] = memref.dim %[[A]], %[[C0]] : memref<?x?xf32, #[[$strided2D]]>
// CHECK-DAG: %[[B_1:.*]] = memref.dim %[[B]], %[[C1]] : memref<?x?xf32, #[[$strided2D]]>
// CHECK-DAG: %[[C_0:.*]] = memref.dim %[[C]], %[[C0]] : memref<?x?xf32, #[[$strided2D]]>
// CHECK-DAG: %[[D_0:.*]] = memref.dim %[[D]], %[[C0]] : memref<?x?xf32, #[[$strided2D]]>
@@ -268,18 +269,17 @@ func @f5(%A: memref<?x?xf32, offset: 0, strides: [?, ?]>,
// CHECK: scf.for %[[I:.*]] = %{{.*}} to %[[D_0]] step %{{.*}} {
// CHECK: %[[BOUND_2_C0:.+]] = affine.min #[[BOUND_2_MAP]](%[[I]])[%[[C_0]]]
// CHECK: %[[C_I0:.*]] = memref.subview %[[C]][%[[I]], 0] [%[[BOUND_2_C0]]
-// CHECK: %[[BOUND_2_D0:.+]] = affine.min #[[BOUND_2_MAP]](%[[I]])[%[[D_0]]]
+// CHECK: %[[BOUND_ID_C0:.+]] = affine.min #[[BOUND_2_MAP_2]](%[[I]])[%[[A_0]], %[[C_0]]]
// CHECK: %[[A_I0:.*]] = memref.subview %[[A]][%[[I]], 0]
-// CHECK: %[[BOUND_ID_C0:.+]] = affine.min #[[BOUND_2_MAP_2]](%[[I]])[%[[C_0]], %[[C_0]]]
// CHECK: %[[C_I0_OUT:.*]] = memref.subview %[[C]][%[[I]], 0] [%[[BOUND_ID_C0]]
// CHECK: scf.for %[[J:.*]] = %{{.*}} to %[[B_1]] step %{{.*}} {
// CHECK: %[[E_IJ:.*]] = memref.subview %[[E]][%[[I]], %[[J]]]
// CHECK: scf.for %[[K:.*]] = %{{.*}} to %[[D_1]] step %{{.*}} {
// CHECK: %[[D_IK:.*]] = memref.subview %[[D]][%[[I]], %[[K]]] [2, 4]
// CHECK: %[[B_KJ:.*]] = memref.subview %[[B]][%[[K]], %[[J]]]
+// CHECK: %[[BOUND_4_B1:.*]] = affine.min #[[BOUND_4_MAP]](%[[K]])[%[[B_1]]]
// CHECK: %[[B_0K:.*]] = memref.subview %[[B]][0, %[[K]]]
-// CHECK: %[[BOUND_4_D1:.+]] = affine.min #[[BOUND_4_MAP]](%[[K]])[%[[D_1]]]
-// CHECK: %[[D_IK_OUT:.+]] = memref.subview %[[D]][%[[I]], %[[K]]] [%[[BOUND_2_D0]], %[[BOUND_4_D1]]]
+// CHECK: %[[D_IK_OUT:.+]] = memref.subview %[[D]][%[[I]], %[[K]]] [%[[BOUND_2_C0]], %[[BOUND_4_B1]]]
// CHECK: linalg.matmul ins(%[[A_I0]], %[[B_00]]{{.*}} outs(%[[C_I0_OUT]]
// CHECK: linalg.matmul ins(%[[C_I0]], %[[B_0K]]{{.*}} outs(%[[D_IK_OUT]]
// CHECK: linalg.matmul ins(%[[D_IK]], %[[B_KJ]]{{.*}} outs(%[[E_IJ]]
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
index 2bab8758ee0b6..c1a761ce1c425 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -44,12 +44,9 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens
// CHECK-DAG: %[[dA1:.*]] = tensor.dim %[[A]], %[[C1]] : tensor<?x?xf32>
// CHECK-DAG: %[[dB0:.*]] = tensor.dim %[[B]], %[[C0]] : tensor<?x?xf32>
// CHECK-DAG: %[[dB1:.*]] = tensor.dim %[[B]], %[[C1]] : tensor<?x?xf32>
-// CHECK-DAG: %[[dC0:.*]] = tensor.dim %[[C]], %[[C0]] : tensor<?x?xf32>
-// CHECK-DAG: %[[dC1:.*]] = tensor.dim %[[C]], %[[C1]] : tensor<?x?xf32>
// CHECK: scf.for %[[I:[0-9a-z]*]]
// CHECK: %[[sizeA0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dA0]]]
// CHECK: %[[stA:.*]] = tensor.extract_slice %[[A]][%[[I]], 0] [%[[sizeA0]], %[[dA1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-// CHECK: %[[sizeC0:.*]] = affine.min #[[BOUND2_MAP]](%[[I]])[%[[dC0]]]
// CHECK-NEXT: scf.for %[[J:[0-9a-z]*]]
// CHECK-NEXT: scf.for %[[K:[0-9a-z]*]] {{.*}} iter_args(%[[RES:[0-9a-z]*]]
// CHECK-DAG: %[[stB1:.*]] = tensor.extract_slice %[[B]][%[[K]], %[[J]]] [4, 3] [1, 1] : tensor<?x?xf32> to tensor<4x3xf32>
@@ -58,8 +55,7 @@ func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tens
// slices of the producing matmul.
// CHECK: %[[sizeB1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dB1]]]
// CHECK: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], %[[sizeB1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-// CHECK: %[[sizeC1:.*]] = affine.min #[[BOUND4_MAP]](%[[K]])[%[[dC1]]]
-// CHECK: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [%[[sizeC0]], %[[sizeC1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+// CHECK: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [%[[sizeA0]], %[[sizeB1]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[stC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: %[[CAST:.*]] = tensor.cast %[[stD]] : tensor<?x?xf32> to tensor<2x4xf32>
// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[CAST]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32>
@@ -207,7 +203,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
// CHECK: #[[BOUND8_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 8, -d0 + s1)>
// CHECK: #[[BOUND16_MAP:.+]] = affine_map<(d0)[s0] -> (16, -d0 + s0)>
// CHECK: #[[X2_MAP:.+]] = affine_map<(d0) -> (d0 * 2)>
-// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s1)>
+// CHECK: #[[INPUT_BOUND:.+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * 2 + s0 - 2, d1 * -2 + s0 + s1 * 2)>
// CHECK: #[[BOUND16_MAP_2:.+]] = affine_map<(d0)[s0, s1] -> (-d0 + s0, 16, -d0 + s1)>
// CHECK: #[[BOUND4_MAP:.+]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
// CHECK: #[[BOUND2_MAP:.+]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
@@ -232,31 +228,26 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
// CHECK-DAG: %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
// CHECK-DAG: %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32>
-// CHECK-DAG: %[[INPUT_N:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?x?xf32>
-// CHECK-DAG: %[[INPUT_H:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x?x?xf32>
-// CHECK-DAG: %[[INPUT_W:.+]] = tensor.dim %[[INPUT]], %[[C2]] : tensor<?x?x?x?xf32>
-// CHECK-DAG: %[[INPUT_C:.+]] = tensor.dim %[[INPUT]], %[[C3]] : tensor<?x?x?x?xf32>
// CHECK-DAG: %[[FILTER_IC:.+]] = tensor.dim %[[FILTER]], %[[C2]] : tensor<?x?x?x?xf32>
// CHECK-DAG: %[[FILTER_OC:.+]] = tensor.dim %[[FILTER]], %[[C3]] : tensor<?x?x?x?xf32>
-// CHECK-DAG: %[[FILL_N:.+]] = tensor.dim %[[FILL]], %[[C0]] : tensor<?x?x?x?xf32>
+// CHECK-DAG: %[[INPUT_N:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?x?xf32>
+// CHECK-DAG: %[[INPUT_C:.+]] = tensor.dim %[[INPUT]], %[[C3]] : tensor<?x?x?x?xf32>
// CHECK-DAG: %[[FILL_H:.+]] = tensor.dim %[[FILL]], %[[C1]] : tensor<?x?x?x?xf32>
// CHECK-DAG: %[[FILL_W:.+]] = tensor.dim %[[FILL]], %[[C2]] : tensor<?x?x?x?xf32>
-// CHECK-DAG: %[[FILL_C:.+]] = tensor.dim %[[FILL]], %[[C3]] : tensor<?x?x?x?xf32>
// CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %[[ELEM_N]] step %{{.+}} iter_args(%{{.+}} = %[[FILL]])
// CHECK-NEXT: %[[SIZE_ELEM_N:.+]] = affine.min #[[BOUND8_MAP]](%[[IV0]])[%[[ELEM_N]]]
// CHECK-NEXT: %[[SIZE_INPUT_N:.+]] = affine.min #[[BOUND8_MAP_2]](%[[IV0]])[%[[INPUT_N]], %[[ELEM_N]]]
-// CHECK-NEXT: %[[SIZE_ELEM_N_2:.+]] = affine.min #[[BOUND8_MAP_2]](%[[IV0]])[%[[FILL_N]], %[[ELEM_N]]]
// CHECK-NEXT: scf.for %[[IV1:.+]] = %{{.+}} to %[[ELEM_OH]]
// CHECK-NEXT: %[[SIZE_ELEM_OH:.+]] = affine.min #[[BOUND16_MAP]](%[[IV1]])[%[[ELEM_OH]]]
// CHECK-NEXT: %[[OFFSET_OH:.+]] = affine.apply #[[X2_MAP]](%[[IV1]])
-// CHECK-NEXT: %[[SIZE_INPUT_H:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OH]], %[[IV1]])[%[[FILTER_H]], %[[INPUT_H]]]
+// CHECK-NEXT: %[[SIZE_INPUT_H:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OH]], %[[IV1]])[%[[FILTER_H]], %[[FILL_H]]]
// CHECK-NEXT: %[[SIZE_ELEM_OH_2:.+]] = affine.min #[[BOUND16_MAP_2]](%[[IV1]])[%[[FILL_H]], %[[ELEM_OH]]]
// CHECK-NEXT: scf.for %[[IV2:.+]] = %{{.+}} to %[[ELEM_OW]]
// CHECK-NEXT: %[[SIZE_ELEM_OW:.+]] = affine.min #[[BOUND4_MAP]](%[[IV2]])[%[[ELEM_OW]]]
// CHECK-NEXT: %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]]
// CHECK-NEXT: %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]])
-// CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[INPUT_W]]]
+// CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[SIZE_ELEM_OW]], %[[IV2]])[%[[FILTER_W]], %[[FILL_W]]]
// CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0]
// CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]]
// CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]]
@@ -268,9 +259,8 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
// CHECK-NEXT: %[[SIZE_ELEM_OC_2:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILTER_OC]], %[[ELEM_OC]]]
// CHECK-NEXT: %[[ST_FILTER:.+]] = tensor.extract_slice %[[FILTER]][0, 0, 0, %[[IV3]]]
// CHECK-SAME: [%[[FILTER_H]], %[[FILTER_W]], %[[FILTER_IC]], %[[SIZE_ELEM_OC_2]]]
-// CHECK-NEXT: %[[SIZE_ELEM_OC_3:.+]] = affine.min #[[BOUND2_MAP_2]](%[[IV3]], %[[IV2]])[%[[FILL_C]], %[[ELEM_OC]]]
// CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-// CHECK-SAME: [%[[SIZE_ELEM_N_2]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_3]]]
+// CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_2]]]
// CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf
// CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
// CHECK-SAME: outs(%[[ST_FILL]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-conv-padding.mlir b/mlir/test/Dialect/Linalg/tile-conv-padding.mlir
index 28b18e22d1def..0846c0a77f09e 100644
--- a/mlir/test/Dialect/Linalg/tile-conv-padding.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv-padding.mlir
@@ -23,14 +23,12 @@ func @conv_padding(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>,
// TILE-20000-DAG: %[[C2:.*]] = constant 2 : index
// TILE-20000: %[[B:.*]] = memref.dim %[[ARG1]], %c0
// TILE-20000: scf.for %[[ivI:.*]] = %[[C0]] to %[[B]] step %[[C2]] {
-// TILE-20000: %[[DIM10:.*]] = memref.dim %[[ARG1]], %c0
-// TILE-20000: %[[EXTENT:.*]] = affine.min #[[$minmap]](%[[ivI]])[%[[DIM10]]]
+// TILE-20000: %[[EXTENT:.*]] = affine.min #[[$minmap]](%[[ivI]])[%[[B]]]
// TILE-20000: %[[DIM11:.*]] = memref.dim %[[ARG1]], %c1
// TILE-20000: %[[DIM12:.*]] = memref.dim %[[ARG1]], %c2
// TILE-20000: %[[DIM13:.*]] = memref.dim %[[ARG1]], %c3
// TILE-20000: %[[SUBVIEW1:.*]] = memref.subview %[[ARG1]][%[[ivI]], 0, 0, 0] [%[[EXTENT]], %[[DIM11]], %[[DIM12]], %[[DIM13]]]
-// TILE-20000: %[[DIM20:.*]] = memref.dim %[[ARG2]], %c0
-// TILE-20000: %[[EXTENT:.*]] = affine.min #[[$minmap]](%[[ivI]])[%[[DIM20]]]
+// TILE-20000: %[[EXTENT:.*]] = affine.min #[[$minmap]](%[[ivI]])[%[[B]]]
// TILE-20000: %[[DIM21:.*]] = memref.dim %[[ARG2]], %c1
// TILE-20000: %[[DIM22:.*]] = memref.dim %[[ARG2]], %c2
// TILE-20000: %[[DIM23:.*]] = memref.dim %[[ARG2]], %c3
diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
index 530214b8b8d1b..daab946eb48ca 100644
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -1,8 +1,10 @@
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
// TILE-23004-DAG: #[[$D0x30pS0x10:.*]] = affine_map<(d0) -> (d0 * 30)>
-// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s1)>
+// TILE-23004-DAG: #[[$S0x10p90D0x30pS1:.*]] = affine_map<(d0)[s0, s1] -> (s0 * 10 + 51, d0 * -30 + s0 * 10 + s1 * 30)>
// TILE-23004-DAG: #[[$strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
+// TILE-23004-DAG: #[[$bound_map_2:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
+// TILE-23004-DAG: #[[$bound_map_3:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
// TILE-23004-DAG: #[[$bound_map_4:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
func @conv(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg2: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>) {
@@ -26,17 +28,14 @@ func @conv(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %arg1:
// TILE-23004: scf.for %[[ivK:.*]] = %{{.*}} to %[[Q]] step %{{.*}} {
// TILE-23004: %[[Z0_1:.*]] = memref.dim %[[ARG0]], %c0 : memref<?x?x?x?xf32, #[[$strided4D]]>
// TILE-23004: %[[Z1:.*]] = memref.dim %[[ARG0]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// TILE-23004: %[[Z2:.*]] = memref.dim %[[ARG0]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// TILE-23004: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[ivK]])[%[[Z2]]]
+// TILE-23004: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[ivK]])[%[[Q]]]
// TILE-23004: %[[K:.*]] = memref.dim %[[ARG0]], %c3 : memref<?x?x?x?xf32, #[[$strided4D]]>
// TILE-23004: %[[FilterView:.*]] = memref.subview %{{.*}}[0, 0, %[[ivK]], 0] [%[[Z0_1]], %[[Z1]], %[[szK]], %[[K]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
//
// TILE-23004: %[[J1:.*]] = affine.apply #[[$D0x30pS0x10]](%[[ivJ]])
-// TILE-23004: %[[PaddedInput0b:.*]] = memref.dim %[[ARG1]], %c1 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// TILE-23004: %[[I1pStep:.*]] = affine.min #[[$S0x10p90D0x30pS1]](%[[ivJ]])[%[[Z0]], %[[PaddedInput0b]]]
+// TILE-23004: %[[I1pStep:.*]] = affine.min #[[$S0x10p90D0x30pS1]](%[[ivJ]])[%[[Z0]], %[[X0]]]
// TILE-23004: %[[SZ2:.*]] = memref.dim %[[ARG1]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
-// TILE-23004: %[[dim3:.*]] = memref.dim %[[ARG1]], %c3
-// TILE-23004: %[[sz3:.*]] = affine.min #[[$bound_map_4]](%[[ivK]])[%[[dim3]]]
+// TILE-23004: %[[sz3:.*]] = affine.min #[[$bound_map_4]](%[[ivK]])[%[[Q]]]
// TILE-23004: %[[InputView:.*]] = memref.subview %{{.*}}[%[[ivI]], %[[J1]], 0, %[[ivK]]] [%{{.*}}, %{{.*}}, %[[SZ2]], %[[sz3]]] [1, 1, 1, 1] : memref<?x?x?x?xf32, #[[$strided4D]]> to memref<?x?x?x?xf32, #[[$strided4D]]>
//
// TILE-23004: %[[X0:.*]] = memref.dim %[[ARG2]], %c2 : memref<?x?x?x?xf32, #[[$strided4D]]>
diff --git a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
index 4c49fec7536a0..50b0c5ca3e1e1 100644
--- a/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-simple-conv.mlir
@@ -1,8 +1,8 @@
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,4" | FileCheck %s
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
-// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s1)>
-// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s1)>
+// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 2, -d0 + s0 + s1)>
+// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (s0 + 3, -d0 + s0 + s1)>
// CHECK-DAG: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
// CHECK-DAG: #[[MAP5:.*]] = affine_map<(d0)[s0] -> (4, -d0 + s0)>
@@ -28,21 +28,15 @@ func @conv(%arg0 : memref<?x?x?x?xf32>, %arg1 : memref<?x?x?x?xf32>, %arg2 : mem
// CHECK: scf.for %[[ARG3:.*]] = %[[C0]] to %[[T2]] step %[[C2]]
// CHECK: scf.for %[[ARG4:.*]] = %[[C0]] to %[[T3]] step %[[C3]]
// CHECK: scf.for %[[ARG5:.*]] = %[[C0]] to %[[T4]] step %[[C4]]
-// CHECK: %[[T5:.*]] = memref.dim %[[ARG1]], %[[C0]]
-// CHECK: %[[T6:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T5]]]
-// CHECK: %[[T7:.*]] = memref.dim %[[ARG1]], %[[C1]]
-// CHECK: %[[T8:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T0]], %[[T7]]]
-// CHECK: %[[T9:.*]] = memref.dim %[[ARG1]], %[[C2]]
-// CHECK: %[[T10:.*]] = affine.min #[[MAP2]](%[[ARG5]])[%[[T1]], %[[T9]]]
+// CHECK: %[[T6:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T2]]]
+// CHECK: %[[T8:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T0]], %[[T3]]]
+// CHECK: %[[T10:.*]] = affine.min #[[MAP2]](%[[ARG5]])[%[[T1]], %[[T4]]]
// CHECK: %[[T11:.*]] = memref.dim %[[ARG1]], %[[C3]]
// CHECK: %[[SV1:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0]
// CHECK-SAME: [%[[T6]], %[[T8]], %[[T10]], %[[T11]]]
-// CHECK: %[[T13:.*]] = memref.dim %[[ARG2]], %[[C0]]
-// CHECK: %[[T14:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T13]]]
-// CHECK: %[[T15:.*]] = memref.dim %[[ARG2]], %[[C1]]
-// CHECK: %[[T16:.*]] = affine.min #[[MAP4]](%[[ARG4]])[%[[T15]]]
-// CHECK: %[[T17:.*]] = memref.dim %[[ARG2]], %[[C2]]
-// CHECK: %[[T18:.*]] = affine.min #[[MAP5]](%[[ARG5]])[%[[T17]]]
+// CHECK: %[[T14:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T2]]
+// CHECK: %[[T16:.*]] = affine.min #[[MAP4]](%[[ARG4]])[%[[T3]]]
+// CHECK: %[[T18:.*]] = affine.min #[[MAP5]](%[[ARG5]])[%[[T4]]
// CHECK: %[[T19:.*]] = memref.dim %[[ARG2]], %[[C3]]
// CHECK: %[[SV2:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]], %[[ARG5]], 0]
// CHECK-SAME: [%[[T14]], %[[T16]], %[[T18]], %[[T19]]]
diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir
index 97b17ebb7bdc2..f7074c51a5166 100644
--- a/mlir/test/Dialect/Linalg/tile.mlir
+++ b/mlir/test/Dialect/Linalg/tile.mlir
@@ -38,12 +38,10 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-2-DAG: %[[C2:.*]] = constant 2 : index
// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-// TILE-2: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
+// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-// TILE-2: %[[localK:.*]] = memref.dim %{{.*}}, %c0
-// TILE-2: %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localK]]]
+// TILE-2: %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-2: linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]]
@@ -54,12 +52,10 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-02: %[[N:.*]] = memref.dim %arg1, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} {
// TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-// TILE-02: %[[localN:.*]] = memref.dim %{{.*}}, %c1
-// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localN]]]
+// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-// TILE-02: %[[localK:.*]] = memref.dim %{{.*}}, %c1
-// TILE-02: %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localK]]]
+// TILE-02: %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]]
// TILE-02: %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-02: linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]]
@@ -69,11 +65,9 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-002: %[[ubK:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
// TILE-002: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-// TILE-002: %[[localK:.*]] = memref.dim %{{.*}}, %c1
-// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[localK]]]
+// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
// TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-// TILE-002: %[[localK:.*]] = memref.dim %{{.*}}, %c0
-// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[localK]]]
+// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]]
// TILE-002: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
// TILE-002: linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
@@ -89,20 +83,14 @@ func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[ubM]] step %{{.*}} {
// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[ubN]] step %{{.*}} {
// TILE-234: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} {
-// TILE-234: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-// TILE-234: %[[localK:.*]] = memref.dim %{{.*}}, %c1
-// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[localK]]]
+// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
+// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
// TILE-234: %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-// TILE-234: %[[localK:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[localK]]]
-// TILE-234: %[[localN:.*]] = memref.dim %{{.*}}, %c1
-// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
+// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]]
+// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-// TILE-234: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-// TILE-234: %[[localN:.*]] = memref.dim %{{.*}}, %c1
-// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
+// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]]
+// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]]
// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
//
// TILE-234: linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]]
@@ -182,12 +170,10 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
// TILE-2-DAG: %[[C2:.*]] = constant 2 : index
// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-// TILE-2: %[[localM:.*]] = memref.dim %[[ARG0]], %c0
-// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
+// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-// TILE-2: %[[localN:.*]] = memref.dim %{{.*}}, %c0
-// TILE-2: %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localN]]]
+// TILE-2: %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-2: linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]]
@@ -200,11 +186,9 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
// TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
// TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]>
-// TILE-02: %[[localN:.*]] = memref.dim %{{.*}}, %c1
-// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localN]]]
+// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
// TILE-02: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-// TILE-02: %[[localN:.*]] = memref.dim %{{.*}}, %c0
-// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[localN]]]
+// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]]
// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-02: linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}}
@@ -225,16 +209,12 @@ func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?
// TILE-234: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]>
// TILE-234: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
// TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} {
-// TILE-234: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
-// TILE-234: %[[localN:.*]] = memref.dim %{{.*}}, %c1
-// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
+// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
+// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
// TILE-234: %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]>
-// TILE-234: %[[localN:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[localN]]]
+// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]]
// TILE-234: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
-// TILE-234: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
+// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]]
// TILE-234: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
//
// TILE-234: linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]]
@@ -250,11 +230,9 @@ func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, of
// TILE-2-DAG: %[[C2:.*]] = constant 2 : index
// TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
// TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} {
-// TILE-2: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
+// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
-// TILE-2: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[localM]]]
+// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]]
// TILE-2: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-2: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
@@ -269,11 +247,9 @@ func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf32, of
// TILE-234-DAG: %[[C2:.*]] = constant 2 : index
// TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]>
// TILE-234: scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} {
-// TILE-234: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
+// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
// TILE-234: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
-// TILE-234: %[[localM:.*]] = memref.dim %{{.*}}, %c0
-// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[localM]]]
+// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]]
// TILE-234: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]>
// TILE-234: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs(
More information about the Mlir-commits
mailing list