[Mlir-commits] [mlir] 0602e8f - [MLIR][Affine] Add parametric tile size support for affine.for tiling
Uday Bondhugula
llvmlistbot at llvm.org
Thu Sep 17 11:13:27 PDT 2020
Author: Navdeep Kumar
Date: 2020-09-17T23:39:14+05:30
New Revision: 0602e8f77f8662c85155b8cf02937a2e71c01e12
URL: https://github.com/llvm/llvm-project/commit/0602e8f77f8662c85155b8cf02937a2e71c01e12
DIFF: https://github.com/llvm/llvm-project/commit/0602e8f77f8662c85155b8cf02937a2e71c01e12.diff
LOG: [MLIR][Affine] Add parametric tile size support for affine.for tiling
Add support to tile affine.for ops with parametric sizes (i.e., SSA
values). Currently supports hyper-rectangular loop nests with constant
lower bounds only. Move methods
- moveLoopBody(*)
- getTileableBands(*)
- checkTilingLegality(*)
- tilePerfectlyNested(*)
- constructTiledIndexSetHyperRect(*)
to allow reuse with constant tile size API. Add a test pass -test-affine
-parametric-tile to test parametric tiling.
Differential Revision: https://reviews.llvm.org/D87353
Added:
mlir/test/Dialect/Affine/loop-tiling-parametric.mlir
mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp
Modified:
mlir/include/mlir/Transforms/LoopUtils.h
mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
mlir/lib/Transforms/Utils/LoopUtils.cpp
mlir/test/lib/Transforms/CMakeLists.txt
mlir/tools/mlir-opt/mlir-opt.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index 5a0d46f5ba57..aaff786fbe2f 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -88,16 +88,28 @@ LLVM_NODISCARD
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
bool unrollPrologueEpilogue = false);
+/// Identify valid and profitable bands of loops to tile. This is currently just
+/// a temporary placeholder to test the mechanics of tiled code generation.
+/// Returns all maximal outermost perfect loop nests to tile.
+void getTileableBands(FuncOp f,
+ std::vector<SmallVector<AffineForOp, 6>> *bands);
+
/// Tiles the specified band of perfectly nested loops creating tile-space loops
-/// and intra-tile loops. A band is a contiguous set of loops. `tiledNest` when
-/// non-null is set to the loops of the tiled nest from outermost to innermost.
-/// Loops in `input` are erased when the tiling is successful.
+/// and intra-tile loops. A band is a contiguous set of loops.
LLVM_NODISCARD
LogicalResult
tilePerfectlyNested(MutableArrayRef<AffineForOp> input,
ArrayRef<unsigned> tileSizes,
SmallVectorImpl<AffineForOp> *tiledNest = nullptr);
+/// Tiles the specified band of perfectly nested loops creating tile-space
+/// loops and intra-tile loops, using SSA values as tiling parameters. A band
+/// is a contiguous set of loops.
+LLVM_NODISCARD
+LogicalResult tilePerfectlyNestedParametric(
+ MutableArrayRef<AffineForOp> input, ArrayRef<Value> tileSizes,
+ SmallVectorImpl<AffineForOp> *tiledNest = nullptr);
+
/// Performs loop interchange on 'forOpA' and 'forOpB'. Requires that 'forOpA'
/// and 'forOpB' are part of a perfectly nested sequence of loops.
void interchangeLoops(AffineForOp forOpA, AffineForOp forOpB);
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
index 5bded917978a..56469482c763 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
@@ -61,278 +61,6 @@ std::unique_ptr<OperationPass<FuncOp>> mlir::createLoopTilingPass() {
return std::make_unique<LoopTiling>();
}
-// Move the loop body of AffineForOp 'src' from 'src' into the specified
-// location in destination's body, ignoring the terminator.
-static inline void moveLoopBody(AffineForOp src, AffineForOp dest,
- Block::iterator loc) {
- auto &insts = src.getBody()->getOperations();
- dest.getBody()->getOperations().splice(loc, insts, insts.begin(),
- std::prev(insts.end()));
-}
-
-// Move the loop body of AffineForOp 'src' from 'src' to the start of dest's
-// body.
-static inline void moveLoopBody(AffineForOp src, AffineForOp dest) {
- moveLoopBody(src, dest, dest.getBody()->begin());
-}
-
-/// Constructs and sets new loop bounds after tiling for the case of
-/// hyper-rectangular index sets, where the bounds of one dimension do not
-/// depend on other dimensions. Bounds of each dimension can thus be treated
-/// independently, and deriving the new bounds is much simpler and faster
-/// than for the case of tiling arbitrary polyhedral shapes.
-static void
-constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
- MutableArrayRef<AffineForOp> newLoops,
- ArrayRef<unsigned> tileSizes) {
- assert(!origLoops.empty());
- assert(origLoops.size() == tileSizes.size());
-
- OpBuilder b(origLoops[0].getOperation());
- unsigned width = origLoops.size();
-
- // Bounds for tile space loops.
- for (unsigned i = 0; i < width; i++) {
- OperandRange newLbOperands = origLoops[i].getLowerBoundOperands();
- OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
- newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
- newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
- newLoops[i].setStep(tileSizes[i]);
- }
- // Bounds for intra-tile loops.
- for (unsigned i = 0; i < width; i++) {
- int64_t largestDiv = getLargestDivisorOfTripCount(origLoops[i]);
- auto mayBeConstantCount = getConstantTripCount(origLoops[i]);
- // The lower bound is just the tile-space loop.
- AffineMap lbMap = b.getDimIdentityMap();
- newLoops[width + i].setLowerBound(
- /*operands=*/newLoops[i].getInductionVar(), lbMap);
-
- // Set the upper bound.
- if (mayBeConstantCount && mayBeConstantCount.getValue() < tileSizes[i]) {
- // Trip count is less than the tile size: upper bound is lower bound +
- // trip count.
- auto ubMap = b.getSingleDimShiftAffineMap(mayBeConstantCount.getValue());
- newLoops[width + i].setUpperBound(
- /*operands=*/newLoops[i].getInductionVar(), ubMap);
- } else if (largestDiv % tileSizes[i] != 0) {
- // Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
- // Construct the upper bound map; the operands are the original operands
- // with 'i' (tile-space loop) appended to it. The new upper bound map is
- // the original one with an additional expression i + tileSize appended.
-
- // Add dim operands from original upper bound.
- SmallVector<Value, 4> ubOperands;
- auto ub = origLoops[i].getUpperBound();
- ubOperands.reserve(ub.getNumOperands() + 1);
- auto origUbMap = ub.getMap();
- for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
- ubOperands.push_back(ub.getOperand(j));
-
- // Add dim operand for new loop upper bound.
- ubOperands.push_back(newLoops[i].getInductionVar());
-
- // Add symbol operands from original upper bound.
- for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
- ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
-
- SmallVector<AffineExpr, 4> boundExprs;
- boundExprs.reserve(1 + origUbMap.getNumResults());
- auto dim = b.getAffineDimExpr(origUbMap.getNumDims());
- // The new upper bound map is the original one with an additional
- // expression i + tileSize appended.
- boundExprs.push_back(dim + tileSizes[i]);
- boundExprs.append(origUbMap.getResults().begin(),
- origUbMap.getResults().end());
- auto ubMap =
- AffineMap::get(origUbMap.getNumDims() + 1, origUbMap.getNumSymbols(),
- boundExprs, b.getContext());
- newLoops[width + i].setUpperBound(/*operands=*/ubOperands, ubMap);
- } else {
- // No need of the min expression.
- auto dim = b.getAffineDimExpr(0);
- auto ubMap = AffineMap::get(1, 0, dim + tileSizes[i]);
- newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
- }
- }
-}
-
-/// This function checks whether hyper-rectangular loop tiling of the nest
-/// represented by `origLoops` is valid. The validity condition is from Irigoin
-/// and Triolet, which states that two tiles cannot depend on each other. We
-/// simplify such condition to just checking whether there is any negative
-/// dependence direction, since we have the prior knowledge that the tiling
-/// results will be hyper-rectangles, which are scheduled in the
-/// lexicographically increasing order on the vector of loop indices. This
-/// function will return failure when any dependence component is negative along
-/// any of `origLoops`.
-static LogicalResult
-checkTilingLegality(MutableArrayRef<mlir::AffineForOp> origLoops) {
- assert(!origLoops.empty() && "no original loops provided");
-
- // We first find out all dependences we intend to check.
- SmallVector<Operation *, 8> loadAndStoreOps;
- origLoops[0].getOperation()->walk([&](Operation *op) {
- if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
- loadAndStoreOps.push_back(op);
- });
-
- unsigned numOps = loadAndStoreOps.size();
- unsigned numLoops = origLoops.size();
- FlatAffineConstraints dependenceConstraints;
- for (unsigned d = 1; d <= numLoops + 1; ++d) {
- for (unsigned i = 0; i < numOps; ++i) {
- Operation *srcOp = loadAndStoreOps[i];
- MemRefAccess srcAccess(srcOp);
- for (unsigned j = 0; j < numOps; ++j) {
- Operation *dstOp = loadAndStoreOps[j];
- MemRefAccess dstAccess(dstOp);
-
- SmallVector<DependenceComponent, 2> depComps;
- dependenceConstraints.reset();
- DependenceResult result = checkMemrefAccessDependence(
- srcAccess, dstAccess, d, &dependenceConstraints, &depComps);
-
- // Skip if there is no dependence in this case.
- if (!hasDependence(result))
- continue;
-
- // Check whether there is any negative direction vector in the
- // dependence components found above, which means that dependence is
- // violated by the default hyper-rect tiling method.
- LLVM_DEBUG(llvm::dbgs() << "Checking whether tiling legality violated "
- "for dependence at depth: "
- << Twine(d) << " between:\n";);
- LLVM_DEBUG(srcAccess.opInst->dump(););
- LLVM_DEBUG(dstAccess.opInst->dump(););
- for (unsigned k = 0, e = depComps.size(); k < e; k++) {
- DependenceComponent depComp = depComps[k];
- if (depComp.lb.hasValue() && depComp.ub.hasValue() &&
- depComp.lb.getValue() < depComp.ub.getValue() &&
- depComp.ub.getValue() < 0) {
- LLVM_DEBUG(llvm::dbgs()
- << "Dependence component lb = "
- << Twine(depComp.lb.getValue())
- << " ub = " << Twine(depComp.ub.getValue())
- << " is negative at depth: " << Twine(d)
- << " and thus violates the legality rule.\n");
- return failure();
- }
- }
- }
- }
- }
-
- return success();
-}
-/// Tiles the specified band of perfectly nested loops creating tile-space loops
-/// and intra-tile loops. A band is a contiguous set of loops.
-// TODO: handle non hyper-rectangular spaces.
-LogicalResult
-mlir::tilePerfectlyNested(MutableArrayRef<AffineForOp> input,
- ArrayRef<unsigned> tileSizes,
- SmallVectorImpl<AffineForOp> *tiledNest) {
- // Check if the supplied for op's are all successively nested.
- assert(!input.empty() && "no loops in input band");
- assert(input.size() == tileSizes.size() && "Too few/many tile sizes");
-
- assert(isPerfectlyNested(input) && "input loops not perfectly nested");
-
- auto origLoops = input;
-
- // Perform tiling legality test.
- if (failed(checkTilingLegality(origLoops)))
- origLoops[0].emitRemark("tiled code is illegal due to dependences");
-
- AffineForOp rootAffineForOp = origLoops[0];
- auto loc = rootAffineForOp.getLoc();
- // Note that width is at least one since band isn't empty.
- unsigned width = input.size();
-
- SmallVector<AffineForOp, 6> tiledLoops(2 * width);
-
- // The outermost among the loops as we add more..
- auto *topLoop = rootAffineForOp.getOperation();
- AffineForOp innermostPointLoop;
-
- // Add intra-tile (or point) loops.
- for (unsigned i = 0; i < width; i++) {
- OpBuilder b(topLoop);
- // Loop bounds will be set later.
- auto pointLoop = b.create<AffineForOp>(loc, 0, 0);
- pointLoop.getBody()->getOperations().splice(
- pointLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
- topLoop);
- tiledLoops[2 * width - 1 - i] = pointLoop;
- topLoop = pointLoop.getOperation();
- if (i == 0)
- innermostPointLoop = pointLoop;
- }
-
- // Add tile space loops;
- for (unsigned i = width; i < 2 * width; i++) {
- OpBuilder b(topLoop);
- // Loop bounds will be set later.
- auto tileSpaceLoop = b.create<AffineForOp>(loc, 0, 0);
- tileSpaceLoop.getBody()->getOperations().splice(
- tileSpaceLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
- topLoop);
- tiledLoops[2 * width - i - 1] = tileSpaceLoop;
- topLoop = tileSpaceLoop.getOperation();
- }
-
- // Move the loop body of the original nest to the new one.
- moveLoopBody(origLoops.back(), innermostPointLoop);
-
- SmallVector<Value, 8> origLoopIVs;
- extractForInductionVars(input, &origLoopIVs);
-
- FlatAffineConstraints cst;
- SmallVector<Operation *, 8> ops;
- ops.reserve(input.size());
- for (AffineForOp forOp : input)
- ops.push_back(forOp);
- getIndexSet(ops, &cst);
- if (!cst.isHyperRectangular(0, width)) {
- rootAffineForOp.emitError("tiled code generation unimplemented for the "
- "non-hyperrectangular case");
- return failure();
- }
-
- constructTiledIndexSetHyperRect(origLoops, tiledLoops, tileSizes);
-
- // Replace original IVs with intra-tile loop IVs.
- for (unsigned i = 0; i < width; i++)
- origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
-
- // Erase the old loop nest.
- rootAffineForOp.erase();
-
- if (tiledNest)
- *tiledNest = std::move(tiledLoops);
-
- return success();
-}
-
-// Identify valid and profitable bands of loops to tile. This is currently just
-// a temporary placeholder to test the mechanics of tiled code generation.
-// Returns all maximal outermost perfect loop nests to tile.
-static void getTileableBands(FuncOp f,
- std::vector<SmallVector<AffineForOp, 6>> *bands) {
- // Get maximal perfect nest of 'affine.for' insts starting from root
- // (inclusive).
- auto getMaximalPerfectLoopNest = [&](AffineForOp root) {
- SmallVector<AffineForOp, 6> band;
- getPerfectlyNestedLoops(band, root);
- bands->push_back(band);
- };
-
- for (auto &block : f)
- for (auto &op : block)
- if (auto forOp = dyn_cast<AffineForOp>(op))
- getMaximalPerfectLoopNest(forOp);
-}
-
/// Reduces each tile size to the largest divisor of the corresponding trip
/// count (if the trip count is known).
static void adjustToDivisorsOfTripCounts(ArrayRef<AffineForOp> band,
@@ -340,7 +68,7 @@ static void adjustToDivisorsOfTripCounts(ArrayRef<AffineForOp> band,
assert(band.size() == tileSizes->size() && "invalid tile size count");
for (unsigned i = 0, e = band.size(); i < e; i++) {
unsigned &tSizeAdjusted = (*tileSizes)[i];
- auto mayConst = getConstantTripCount(band[i]);
+ Optional<uint64_t> mayConst = getConstantTripCount(band[i]);
if (!mayConst)
continue;
// Adjust the tile size to largest factor of the trip count less than
@@ -379,14 +107,14 @@ void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
tileSizes->resize(band.size());
// The first loop in the band.
- auto rootForOp = band[0];
+ AffineForOp rootForOp = band[0];
(void)rootForOp;
// Obtain memory footprint and set tile sizes so that a tile fits in
// the cache size. This is an approximation with the assumption that the
// footprint increases with the tile size linearly in that dimension (i.e.,
// assumes one-to-one access function).
- auto fp = getMemoryFootprintBytes(band[0], 0);
+ Optional<int64_t> fp = getMemoryFootprintBytes(band[0], 0);
if (!fp) {
// Fill with default tile sizes if footprint is unknown.
std::fill(tileSizes->begin(), tileSizes->end(),
@@ -445,7 +173,7 @@ void LoopTiling::runOnFunction() {
getTileSizes(band, &tileSizes);
if (llvm::DebugFlag) {
auto diag = band[0].emitRemark("using tile sizes [");
- for (auto tSize : tileSizes)
+ for (unsigned tSize : tileSizes)
diag << tSize << ' ';
diag << "]\n";
}
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 7ae45171ddbd..cf79e267fb8a 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -418,10 +418,559 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
return success();
}
-// Collect perfectly nested loops starting from `rootForOps`. Loops are
-// perfectly nested if each loop is the first and only non-terminator operation
-// in the parent loop. Collect at most `maxLoops` loops and append them to
-// `forOps`.
+/// Checks the legality of tiling of a hyper-rectangular loop nest by simply
+/// checking if there is a 'negative' dependence in the memrefs present in
+/// the loop nest. If yes then tiling is invalid.
+static bool
+checkTilingLegalityImpl(MutableArrayRef<mlir::AffineForOp> origLoops) {
+ assert(!origLoops.empty() && "no original loops provided");
+
+ // We first find out all dependences we intend to check.
+ SmallVector<Operation *, 8> loadAndStoreOps;
+ origLoops[0].getOperation()->walk([&](Operation *op) {
+ if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op))
+ loadAndStoreOps.push_back(op);
+ });
+
+ unsigned numOps = loadAndStoreOps.size();
+ unsigned numLoops = origLoops.size();
+ FlatAffineConstraints dependenceConstraints;
+ for (unsigned d = 1; d <= numLoops + 1; ++d) {
+ for (unsigned i = 0; i < numOps; ++i) {
+ Operation *srcOp = loadAndStoreOps[i];
+ MemRefAccess srcAccess(srcOp);
+ for (unsigned j = 0; j < numOps; ++j) {
+ Operation *dstOp = loadAndStoreOps[j];
+ MemRefAccess dstAccess(dstOp);
+
+ SmallVector<DependenceComponent, 2> depComps;
+ dependenceConstraints.reset();
+ DependenceResult result = checkMemrefAccessDependence(
+ srcAccess, dstAccess, d, &dependenceConstraints, &depComps);
+
+ // Skip if there is no dependence in this case.
+ if (!hasDependence(result))
+ continue;
+
+ // Check whether there is any negative direction vector in the
+ // dependence components found above, which means that dependence is
+ // violated by the default hyper-rect tiling method.
+ LLVM_DEBUG(llvm::dbgs() << "Checking whether tiling legality violated "
+ "for dependence at depth: "
+ << Twine(d) << " between:\n";);
+ LLVM_DEBUG(srcAccess.opInst->dump(););
+ LLVM_DEBUG(dstAccess.opInst->dump(););
+ for (unsigned k = 0, e = depComps.size(); k < e; k++) {
+ DependenceComponent depComp = depComps[k];
+ if (depComp.lb.hasValue() && depComp.ub.hasValue() &&
+ depComp.lb.getValue() < depComp.ub.getValue() &&
+ depComp.ub.getValue() < 0) {
+ LLVM_DEBUG(llvm::dbgs()
+ << "Dependence component lb = "
+ << Twine(depComp.lb.getValue())
+ << " ub = " << Twine(depComp.ub.getValue())
+ << " is negative at depth: " << Twine(d)
+ << " and thus violates the legality rule.\n");
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// Checks whether hyper-rectangular loop tiling of the nest
+/// represented by `origLoops` is valid. The validity condition is from Irigoin
+/// and Triolet, which states that two tiles cannot depend on each other. We
+/// simplify such condition to just checking whether there is any negative
+/// dependence direction, since we have the prior knowledge that the tiling
+/// results will be hyper-rectangles, which are scheduled in the
+/// lexicographically increasing order on the vector of loop indices. This
+/// function will return failure when any dependence component is negative along
+/// any of `origLoops`.
+LogicalResult
+checkTilingLegality(MutableArrayRef<mlir::AffineForOp> origLoops) {
+ return success(checkTilingLegalityImpl(origLoops));
+}
+
+/// Check if the input data is valid and wheter tiled code will be legal or not.
+template <typename t>
+void performPreTilingChecks(MutableArrayRef<AffineForOp> input,
+ ArrayRef<t> tileSizes) {
+ // Check if the supplied for op's are all successively nested.
+ assert(!input.empty() && "no loops in input band");
+ assert(input.size() == tileSizes.size() && "Too few/many tile sizes");
+
+ assert(isPerfectlyNested(input) && "input loops not perfectly nested");
+
+ // Perform tiling legality test.
+ if (failed(checkTilingLegality(input)))
+ input[0].emitRemark("tiled code is illegal due to dependences");
+}
+
+/// Move the loop body of AffineForOp 'src' from 'src' into the specified
+/// location in destination's body, ignoring the terminator.
+static void moveLoopBodyImpl(AffineForOp src, AffineForOp dest,
+ Block::iterator loc) {
+ auto &ops = src.getBody()->getOperations();
+ dest.getBody()->getOperations().splice(loc, ops, ops.begin(),
+ std::prev(ops.end()));
+}
+
+/// Move the loop body of AffineForOp 'src' from 'src' to the start of dest
+/// body.
+void moveLoopBody(AffineForOp src, AffineForOp dest) {
+ moveLoopBodyImpl(src, dest, dest.getBody()->begin());
+}
+
+/// Constructs tiled loop nest, without setting the loop bounds and move the
+/// body of the original loop nest to the tiled loop nest.
+void constructTiledLoopNest(MutableArrayRef<AffineForOp> origLoops,
+ AffineForOp rootAffineForOp, unsigned width,
+ MutableArrayRef<AffineForOp> tiledLoops) {
+ Location loc = rootAffineForOp.getLoc();
+
+ // The outermost among the loops as we add more..
+ Operation *topLoop = rootAffineForOp.getOperation();
+ AffineForOp innermostPointLoop;
+
+ // Add intra-tile (or point) loops.
+ for (unsigned i = 0; i < width; i++) {
+ OpBuilder b(topLoop);
+ // Loop bounds will be set later.
+ AffineForOp pointLoop = b.create<AffineForOp>(loc, 0, 0);
+ pointLoop.getBody()->getOperations().splice(
+ pointLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
+ topLoop);
+ tiledLoops[2 * width - 1 - i] = pointLoop;
+ topLoop = pointLoop.getOperation();
+ if (i == 0)
+ innermostPointLoop = pointLoop;
+ }
+
+ // Add tile space loops;
+ for (unsigned i = width; i < 2 * width; i++) {
+ OpBuilder b(topLoop);
+ // Loop bounds will be set later.
+ AffineForOp tileSpaceLoop = b.create<AffineForOp>(loc, 0, 0);
+ tileSpaceLoop.getBody()->getOperations().splice(
+ tileSpaceLoop.getBody()->begin(), topLoop->getBlock()->getOperations(),
+ topLoop);
+ tiledLoops[2 * width - i - 1] = tileSpaceLoop;
+ topLoop = tileSpaceLoop.getOperation();
+ }
+
+ // Move the loop body of the original nest to the new one.
+ moveLoopBody(origLoops.back(), innermostPointLoop);
+}
+
+/// Checks whether a loop nest is hyper-rectangular or not.
+LogicalResult checkIfHyperRectangular(MutableArrayRef<AffineForOp> input,
+ AffineForOp rootAffineForOp,
+ unsigned width) {
+ FlatAffineConstraints cst;
+ SmallVector<Operation *, 8> ops(input.begin(), input.end());
+ getIndexSet(ops, &cst);
+ if (!cst.isHyperRectangular(0, width)) {
+ rootAffineForOp.emitError("tiled code generation unimplemented for the "
+ "non-hyperrectangular case");
+ return failure();
+ }
+ return success();
+}
+
+/// Set lower and upper bounds of intra-tile loops for parametric tiling.
+// TODO: Handle non-constant lower bounds.
+static void setIntraTileBoundsParametric(OpBuilder &b, AffineForOp origLoop,
+ AffineForOp newInterTileLoop,
+ AffineForOp newIntraTileLoop,
+ Value tileSize) {
+ // The lower bound for the intra-tile loop is represented by an affine map
+ // as (%i, %t0)->((%i - %origlb) * %t0 + %origlb). Similarly, the upper bound
+ // for the intra-tile loop is represented by an affine map as (%i, %t0)->((%i
+ // - %origlb) * %t0) + (%t0 * %origLoopStep) + %origlb), where %i is loop IV
+ // of the corresponding inter-tile loop, %t0 is the corresponding tiling
+ // parameter, %origlb is lower bound and %origLoopStep is the loop step of the
+ // corresponding inter-tile loop.
+
+ assert(origLoop.hasConstantLowerBound() &&
+ "expected input loops to have constant lower bound.");
+
+ // Get lower bound of original loop as an affine expression.
+ AffineExpr origLowerBoundExpr;
+ origLowerBoundExpr =
+ b.getAffineConstantExpr(origLoop.getConstantLowerBound());
+
+ // Add dim operands from original lower/upper bound.
+ SmallVector<Value, 4> lbOperands, ubOperands;
+ AffineBound lb = origLoop.getLowerBound();
+ AffineBound ub = origLoop.getUpperBound();
+ lbOperands.reserve(lb.getNumOperands() + 2);
+ ubOperands.reserve(ub.getNumOperands() + 2);
+ AffineMap origLbMap = lb.getMap();
+ AffineMap origUbMap = ub.getMap();
+ for (unsigned j = 0, e = origLbMap.getNumDims(); j < e; ++j)
+ lbOperands.push_back(lb.getOperand(j));
+ for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
+ ubOperands.push_back(ub.getOperand(j));
+
+ // Add a new dim operand in lb/ubOperands corresponding to the origLoop
+ // IV.
+ lbOperands.push_back(newInterTileLoop.getInductionVar());
+ ubOperands.push_back(newInterTileLoop.getInductionVar());
+
+ // Get loop IV as an affine expression for lower/upper bound. Size of
+ // lb/ubOperands is guaranteed to be atleast one.
+ AffineExpr lbLoopIvExpr = b.getAffineDimExpr(lbOperands.size() - 1);
+ AffineExpr ubLoopIvExpr = b.getAffineDimExpr(ubOperands.size() - 1);
+
+ // Add symbol operands from original lower/upper bound.
+ for (unsigned j = 0, e = origLbMap.getNumSymbols(); j < e; ++j)
+ lbOperands.push_back(lb.getOperand(origLbMap.getNumDims() + j));
+ for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
+ ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
+
+ // Add a new symbol operand which is the tile size for this loop.
+ lbOperands.push_back(tileSize);
+ ubOperands.push_back(tileSize);
+
+ SmallVector<AffineExpr, 4> lbBoundExprs;
+ SmallVector<AffineExpr, 4> ubBoundExprs;
+ lbBoundExprs.reserve(origLbMap.getNumResults());
+ ubBoundExprs.reserve(origUbMap.getNumResults());
+
+ // Get tiling parameter as an affine expression for lb/ub.
+ AffineExpr lbTileParameter = b.getAffineSymbolExpr(origLbMap.getNumSymbols());
+ AffineExpr ubTileParameter = b.getAffineSymbolExpr(origUbMap.getNumSymbols());
+
+ // Insert lb as inter-tile ((loop IV - origlb) * tilingParameter) + origlb.
+ lbBoundExprs.push_back(
+ ((lbLoopIvExpr - origLowerBoundExpr) * lbTileParameter) +
+ origLowerBoundExpr);
+
+ // Get the origLoopStep as an affine expression.
+ AffineExpr origLoopStep = b.getAffineConstantExpr(origLoop.getStep());
+
+ // Insert ub as inter-tile ((loop IV - origlb) * tilingParameter) +
+ // (tilingParameter * origLoopStep) + origlb.
+ ubBoundExprs.push_back(
+ ((ubLoopIvExpr - origLowerBoundExpr) * ubTileParameter) +
+ (ubTileParameter * origLoopStep) + origLowerBoundExpr);
+
+ ubBoundExprs.append(origUbMap.getResults().begin(),
+ origUbMap.getResults().end());
+
+ AffineMap lbMap =
+ AffineMap::get(origLbMap.getNumDims() + 1, origLbMap.getNumSymbols() + 1,
+ lbBoundExprs, b.getContext());
+ newIntraTileLoop.setLowerBound(lbOperands, lbMap);
+
+ AffineMap ubMap =
+ AffineMap::get(origUbMap.getNumDims() + 1, origUbMap.getNumSymbols() + 1,
+ ubBoundExprs, b.getContext());
+ newIntraTileLoop.setUpperBound(ubOperands, ubMap);
+
+ // Original loop step must be preserved.
+ newIntraTileLoop.setStep(origLoop.getStep());
+}
+
+/// Set lower and upper bounds of inter-tile loops for parametric tiling.
+// TODO: Handle non-constant lower bounds.
+static void setInterTileBoundsParametric(OpBuilder &b, AffineForOp origLoop,
+ AffineForOp newLoop, Value tileSize) {
+ OperandRange newLbOperands = origLoop.getLowerBoundOperands();
+
+ // The lower bounds for inter-tile loops are same as the correspondig lower
+ // bounds of original loops.
+ newLoop.setLowerBound(newLbOperands, origLoop.getLowerBoundMap());
+
+ // The new upper bound map for inter-tile loops, assuming constant lower
+ // bounds, are now originalLowerBound + ceildiv((orignalUpperBound -
+ // originalLowerBound), tiling paramter); where tiling parameter is the
+ // respective tile size for that loop. For e.g. if the original ubmap was
+ // ()->(1024), the new map will be
+ // ()[s0]->(ceildiv((1024 -lb) % s0)), where s0 is the tiling parameter.
+ // Therefore a new symbol operand is inserted in the map and the result
+ // expression is overwritten.
+
+ assert(origLoop.hasConstantLowerBound() &&
+ "expected input loops to have constant lower bound.");
+
+ // Get lower bound of original loop as an affine expression.
+ AffineExpr origLowerBoundExpr;
+ origLowerBoundExpr =
+ b.getAffineConstantExpr(origLoop.getConstantLowerBound());
+
+ // Add dim operands from original upper bound.
+ SmallVector<Value, 4> ubOperands;
+ AffineBound ub = origLoop.getUpperBound();
+ ubOperands.reserve(ub.getNumOperands() + 1);
+ AffineMap origUbMap = ub.getMap();
+ for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
+ ubOperands.push_back(ub.getOperand(j));
+
+ // Add symbol operands from original upper bound.
+ for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
+ ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
+
+ // Add a new symbol operand which is the tile size for this loop.
+ ubOperands.push_back(tileSize);
+
+ // Get tiling parameter as an affine expression.
+ AffineExpr tileParameter = b.getAffineSymbolExpr(origUbMap.getNumSymbols());
+
+ SmallVector<AffineExpr, 4> boundExprs;
+ boundExprs.reserve(origUbMap.getNumResults());
+ int64_t origUpperBound;
+ AffineExpr origUpperBoundExpr;
+
+ // If upper bound for the original loop is constant, then the constant can
+ // be obtained as an affine expression straight away.
+ if (origLoop.hasConstantUpperBound()) {
+ origUpperBound = origLoop.getConstantUpperBound();
+
+ // Get original constant upper bound as an affine expression.
+ origUpperBoundExpr = b.getAffineConstantExpr(origUpperBound);
+
+ // Insert the bound as originalLowerBoundceildiv((originalUpperBound -
+ // originalLowerBound), tilingParameter).
+ boundExprs.push_back(
+ origLowerBoundExpr +
+ (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
+ } else {
+ // If upper bound for the original loop is not constant then two cases
+ // are possible, although there handeling is the same, 1.) The result of
+ // ubmap has only one result expression. For e.g.
+ // affine.for %i = 5 to %ub
+ //
+ // A symbol operand is added which represents the tiling paramater. The
+ // new loop bounds here will be like ()[s0, s1] -> ((s0 - 5) ceildiv s1 + 5)
+ // where 's0' is the original upper bound and 's1' is the tiling
+ // parameter. 2.) When ubMap has more than one result expression. For e.g.
+ // #map0 = affine_map<()[s0, s1] -> (s0, s1)
+ // affine.for %i = 5 to min #map0()[%s0, %s1]
+ //
+ // A symbol operand is added which represents the tiling parameter. The
+ // new loop bounds will be like ()[s0, s1, s2] -> ((s0 - 5) ceildiv s2 + 5,
+ // (s1 -5) ceildiv s2 + 5), where s2 is the tiling parameter.
+
+ // Insert the bounds as originalLowerBound + ceildiv((originalUpperBound -
+ // originalLowerBound), tilingParameter).
+ for (AffineExpr origUpperBoundExpr : origUbMap.getResults())
+ boundExprs.push_back(
+ origLowerBoundExpr +
+ (origUpperBoundExpr - origLowerBoundExpr).ceilDiv(tileParameter));
+ }
+
+ AffineMap ubMap =
+ AffineMap::get(origUbMap.getNumDims(), origUbMap.getNumSymbols() + 1,
+ boundExprs, b.getContext());
+ newLoop.setUpperBound(ubOperands, ubMap);
+
+ // Original loop step must be preserved.
+ newLoop.setStep(origLoop.getStep());
+}
+
+/// Constructs and sets new loop bounds after tiling for the case of
+/// hyper-rectangular index sets, where the bounds of one dimension do not
+/// depend on other dimensions and tiling parameters are captured from SSA
+/// values. Bounds of each dimension can thus be treated independently,
+/// and deriving the new bounds is much simpler and faster than for the case of
+/// tiling arbitrary polyhedral shapes.
+static void constructParametricallyTiledIndexSetHyperRect(
+ MutableArrayRef<AffineForOp> origLoops,
+ MutableArrayRef<AffineForOp> newLoops, ArrayRef<Value> tileSizes) {
+ assert(!origLoops.empty() && "expected atleast one loop in band");
+ assert(origLoops.size() == tileSizes.size() &&
+ "expected tiling parameter for each loop in band.");
+
+ OpBuilder b(origLoops[0].getOperation());
+ unsigned width = origLoops.size();
+
+ // Set bounds for tile space loops.
+ for (unsigned i = 0; i < width; ++i) {
+ setInterTileBoundsParametric(b, origLoops[i], newLoops[i], tileSizes[i]);
+ }
+
+ // Set bounds for intra-tile loops.
+ for (unsigned i = 0; i < width; ++i) {
+ setIntraTileBoundsParametric(b, origLoops[i], newLoops[i],
+ newLoops[i + width], tileSizes[i]);
+ }
+}
+
+/// Constructs and sets new loop bounds after tiling for the case of
+/// hyper-rectangular index sets, where the bounds of one dimension do not
+/// depend on other dimensions. Bounds of each dimension can thus be treated
+/// independently, and deriving the new bounds is much simpler and faster
+/// than for the case of tiling arbitrary polyhedral shapes.
+static void
+constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
+ MutableArrayRef<AffineForOp> newLoops,
+ ArrayRef<unsigned> tileSizes) {
+ assert(!origLoops.empty());
+ assert(origLoops.size() == tileSizes.size());
+
+ OpBuilder b(origLoops[0].getOperation());
+ unsigned width = origLoops.size();
+
+ // Bounds for tile space loops.
+ for (unsigned i = 0; i < width; i++) {
+ OperandRange newLbOperands = origLoops[i].getLowerBoundOperands();
+ OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
+ newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
+ newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
+ newLoops[i].setStep(tileSizes[i]);
+ }
+ // Bounds for intra-tile loops.
+ for (unsigned i = 0; i < width; i++) {
+ int64_t largestDiv = getLargestDivisorOfTripCount(origLoops[i]);
+ Optional<uint64_t> mayBeConstantCount = getConstantTripCount(origLoops[i]);
+ // The lower bound is just the tile-space loop.
+ AffineMap lbMap = b.getDimIdentityMap();
+ newLoops[width + i].setLowerBound(
+ /*operands=*/newLoops[i].getInductionVar(), lbMap);
+
+ // Set the upper bound.
+ if (mayBeConstantCount && mayBeConstantCount.getValue() < tileSizes[i]) {
+ // Trip count is less than the tile size: upper bound is lower bound +
+ // trip count.
+ AffineMap ubMap =
+ b.getSingleDimShiftAffineMap(mayBeConstantCount.getValue());
+ newLoops[width + i].setUpperBound(
+ /*operands=*/newLoops[i].getInductionVar(), ubMap);
+ } else if (largestDiv % tileSizes[i] != 0) {
+ // Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
+ // Construct the upper bound map; the operands are the original operands
+ // with 'i' (tile-space loop) appended to it. The new upper bound map is
+ // the original one with an additional expression i + tileSize appended.
+
+ // Add dim operands from original upper bound.
+ SmallVector<Value, 4> ubOperands;
+ AffineBound ub = origLoops[i].getUpperBound();
+ ubOperands.reserve(ub.getNumOperands() + 1);
+ AffineMap origUbMap = ub.getMap();
+ for (unsigned j = 0, e = origUbMap.getNumDims(); j < e; ++j)
+ ubOperands.push_back(ub.getOperand(j));
+
+ // Add dim operand for new loop upper bound.
+ ubOperands.push_back(newLoops[i].getInductionVar());
+
+ // Add symbol operands from original upper bound.
+ for (unsigned j = 0, e = origUbMap.getNumSymbols(); j < e; ++j)
+ ubOperands.push_back(ub.getOperand(origUbMap.getNumDims() + j));
+
+ SmallVector<AffineExpr, 4> boundExprs;
+ boundExprs.reserve(1 + origUbMap.getNumResults());
+ AffineExpr dim = b.getAffineDimExpr(origUbMap.getNumDims());
+ // The new upper bound map is the original one with an additional
+ // expression i + tileSize appended.
+ boundExprs.push_back(dim + tileSizes[i]);
+ boundExprs.append(origUbMap.getResults().begin(),
+ origUbMap.getResults().end());
+ AffineMap ubMap =
+ AffineMap::get(origUbMap.getNumDims() + 1, origUbMap.getNumSymbols(),
+ boundExprs, b.getContext());
+ newLoops[width + i].setUpperBound(/*operands=*/ubOperands, ubMap);
+ } else {
+ // No need of the min expression.
+ AffineExpr dim = b.getAffineDimExpr(0);
+ AffineMap ubMap = AffineMap::get(1, 0, dim + tileSizes[i]);
+ newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
+ }
+ }
+}
+
+/// Tiles the specified band of perfectly nested loops creating tile-space loops
+/// and intra-tile loops. A band is a contiguous set of loops.
+// TODO: handle non hyper-rectangular spaces.
+LogicalResult
+mlir::tilePerfectlyNested(MutableArrayRef<AffineForOp> input,
+ ArrayRef<unsigned> tileSizes,
+ SmallVectorImpl<AffineForOp> *tiledNest) {
+ performPreTilingChecks(input, tileSizes);
+
+ MutableArrayRef<AffineForOp> origLoops = input;
+ AffineForOp rootAffineForOp = origLoops[0];
+ // Note that width is at least one since band isn't empty.
+ unsigned width = input.size();
+ SmallVector<AffineForOp, 6> tiledLoops(2 * width);
+
+ // Construct a tiled loop nest without setting their bounds. Bounds are
+ // set later.
+ constructTiledLoopNest(origLoops, rootAffineForOp, width, tiledLoops);
+
+ SmallVector<Value, 8> origLoopIVs;
+ extractForInductionVars(input, &origLoopIVs);
+
+ if (failed(checkIfHyperRectangular(input, rootAffineForOp, width)))
+ return failure();
+
+ // Set loop bounds for the tiled loop nest.
+ constructTiledIndexSetHyperRect(origLoops, tiledLoops, tileSizes);
+
+ // Replace original IVs with intra-tile loop IVs.
+ for (unsigned i = 0; i < width; i++)
+ origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
+
+ // Erase the old loop nest.
+ rootAffineForOp.erase();
+
+ if (tiledNest)
+ *tiledNest = std::move(tiledLoops);
+
+ return success();
+}
+
+/// Tiles the specified band of perfectly nested loops creating tile-space
+/// loops and intra-tile loops, using SSA values as tiling parameters. A band
+/// is a contiguous set of loops.
+// TODO: handle non hyper-rectangular spaces.
+LogicalResult
+mlir::tilePerfectlyNestedParametric(MutableArrayRef<AffineForOp> input,
+ ArrayRef<Value> tileSizes,
+ SmallVectorImpl<AffineForOp> *tiledNest) {
+ performPreTilingChecks(input, tileSizes);
+
+ MutableArrayRef<AffineForOp> origLoops = input;
+ AffineForOp rootAffineForOp = origLoops[0];
+ // Note that width is at least one since band isn't empty.
+ unsigned width = input.size();
+ SmallVector<AffineForOp, 6> tiledLoops(2 * width);
+
+ // Construct a tiled loop nest without setting their bounds. Bounds are
+ // set later.
+ constructTiledLoopNest(origLoops, rootAffineForOp, width, tiledLoops);
+
+ SmallVector<Value, 8> origLoopIVs;
+ extractForInductionVars(input, &origLoopIVs);
+
+ if (failed(checkIfHyperRectangular(input, rootAffineForOp, width)))
+ return failure();
+
+ // Set loop bounds for the tiled loop nest.
+ constructParametricallyTiledIndexSetHyperRect(origLoops, tiledLoops,
+ tileSizes);
+
+ // Replace original IVs with intra-tile loop IVs.
+ for (unsigned i = 0; i < width; i++)
+ origLoopIVs[i].replaceAllUsesWith(tiledLoops[i + width].getInductionVar());
+
+ // Erase the old loop nest.
+ rootAffineForOp.erase();
+
+ if (tiledNest)
+ *tiledNest = std::move(tiledLoops);
+
+ return success();
+}
+
+/// Collect perfectly nested loops starting from `rootForOps`. Loops are
+/// perfectly nested if each loop is the first and only non-terminator operation
+/// in the parent loop. Collect at most `maxLoops` loops and append them to
+/// `forOps`.
template <typename T>
static void getPerfectlyNestedLoopsImpl(
SmallVectorImpl<T> &forOps, T rootForOp,
@@ -452,6 +1001,20 @@ void mlir::getPerfectlyNestedLoops(SmallVectorImpl<scf::ForOp> &nestedLoops,
getPerfectlyNestedLoopsImpl(nestedLoops, root);
}
+/// Identify valid and profitable bands of loops to tile. This is currently just
+/// a temporary placeholder to test the mechanics of tiled code generation.
+/// Returns all maximal outermost perfect loop nests to tile.
+void mlir::getTileableBands(FuncOp f,
+ std::vector<SmallVector<AffineForOp, 6>> *bands) {
+ // Get maximal perfect nest of 'affine.for' insts starting from root
+ // (inclusive).
+ for (AffineForOp forOp : f.getOps<AffineForOp>()) {
+ SmallVector<AffineForOp, 6> band;
+ getPerfectlyNestedLoops(band, forOp);
+ bands->push_back(band);
+ }
+}
+
/// Unrolls this loop completely.
LogicalResult mlir::loopUnrollFull(AffineForOp forOp) {
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
diff --git a/mlir/test/Dialect/Affine/loop-tiling-parametric.mlir b/mlir/test/Dialect/Affine/loop-tiling-parametric.mlir
new file mode 100644
index 000000000000..5e9bc4a884c2
--- /dev/null
+++ b/mlir/test/Dialect/Affine/loop-tiling-parametric.mlir
@@ -0,0 +1,275 @@
+// RUN: mlir-opt %s -split-input-file -test-affine-parametric-tile | FileCheck %s
+// Test cases to test the utility introduced to tile affine for loops using
+// SSA values as tiling parameters(tile sizes). The tile sizes are expected
+// to be passed as input arguments(before any other argument) to the function
+// enclosing the loop nest. Currently hyper-rectangular loop nests with constant
+// lower bounds are supported.
+
+// -----
+
+// CHECK-DAG: [[LBI:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0, 256)>
+// CHECK-DAG: [[UBI1:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0, 512)>
+// CHECK-DAG: [[UBI2:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0, 1024)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0] -> (256 ceildiv s0)>
+// CHECK-DAG: [[UBO1:#map[0-9]+]] = affine_map<()[s0] -> (512 ceildiv s0)>
+// CHECK-DAG: [[UBO2:#map[0-9]+]] = affine_map<()[s0] -> (1024 ceildiv s0)>
+
+// CHECK: func @loop_tiling_3d([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index)
+// CHECK-NEXT: affine.for [[ARG3:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG0]]
+// CHECK-NEXT: affine.for [[ARG4:%arg[0-9]+]] = 0 to [[UBO1]](){{.*}}[[ARG1]]
+// CHECK-NEXT: affine.for [[ARG5:%arg[0-9]+]] = 0 to [[UBO2]](){{.*}}[[ARG2]]
+// CHECK-NEXT: affine.for %[[I:.*]] = [[LBI]]{{.*}}[[ARG3]]{{.*}}[[ARG0]]{{.*}} to min [[UBI0]]{{.*}}[[ARG3]]{{.*}}[[ARG0]]
+// CHECK-NEXT: affine.for %[[J:.*]] = [[LBI]]{{.*}}[[ARG4]]{{.*}}[[ARG1]]{{.*}} to min [[UBI1]]{{.*}}[[ARG4]]{{.*}}[[ARG1]]
+// CHECK-NEXT: affine.for %[[K:.*]] = [[LBI]]{{.*}}[[ARG5]]{{.*}}[[ARG2]]{{.*}} to min [[UBI2]]{{.*}}[[ARG5]]{{.*}}[[ARG2]]
+// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
+func @loop_tiling_3d(%t0 : index, %t1 : index, %t2 : index) {
+ affine.for %i = 0 to 256 {
+ affine.for %j = 0 to 512 {
+ affine.for %k = 0 to 1024 {
+ "test.foo"(%i, %j, %k) : (index, index, index) -> ()
+ }
+ }
+ }
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0 * 4, 256)>
+// CHECK-DAG: [[UBI1:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0 * 3, 512)>
+// CHECK-DAG: [[UBI2:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0 * 2, 1024)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0] -> (256 ceildiv s0)>
+// CHECK-DAG: [[UBO1:#map[0-9]+]] = affine_map<()[s0] -> (512 ceildiv s0)>
+// CHECK-DAG: [[UBO2:#map[0-9]+]] = affine_map<()[s0] -> (1024 ceildiv s0)>
+
+// CHECK: func @loop_tiling_non_unit_step([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index)
+// CHECK-NEXT: affine.for [[ARG3:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG0]]{{.*}}step 4
+// CHECK-NEXT: affine.for [[ARG4:%arg[0-9]+]] = 0 to [[UBO1]](){{.*}}[[ARG1]]{{.*}} step 3
+// CHECK-NEXT: affine.for [[ARG5:%arg[0-9]+]] = 0 to [[UBO2]](){{.*}}[[ARG2]]{{.*}} step 2
+// CHECK-NEXT: affine.for %[[I:.*]] = [[LBI]]{{.*}}[[ARG3]]{{.*}}[[ARG0]]{{.*}} to min [[UBI0]]{{.*}}[[ARG3]]{{.*}}[[ARG0]]{{.*}} step 4
+// CHECK-NEXT: affine.for %[[J:.*]] = [[LBI]]{{.*}}[[ARG4]]{{.*}}[[ARG1]]{{.*}} to min [[UBI1]]{{.*}}[[ARG4]]{{.*}}[[ARG1]]{{.*}} step 3
+// CHECK-NEXT: affine.for %[[K:.*]] = [[LBI]]{{.*}}[[ARG5]]{{.*}}[[ARG2]]{{.*}} to min [[UBI2]]{{.*}}[[ARG5]]{{.*}}[[ARG2]]{{.*}} step 2
+// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]])
+func @loop_tiling_non_unit_step(%t0: index, %t1: index, %t2: index){
+ affine.for %i = 0 to 256 step 4 {
+ affine.for %j = 0 to 512 step 3 {
+ affine.for %k = 0 to 1024 step 2 {
+ "test.foo"(%i, %j, %k) : (index, index, index) -> ()
+ }
+ }
+ }
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0, s1, s2] -> (d0 * s2 + s2, s0, 4096 floordiv s1)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0, s1, s2] -> (s0 ceildiv s2, (4096 floordiv s1) ceildiv s2)>
+
+// CHECK: func @tile_loop_with_div_in_upper_bound([[ARG0:%arg[0-9]+]]: index, %{{.*}}: memref<?xi32>, %{{.*}}: index, %{{.*}}: index)
+#ub = affine_map<()[s0, s1] -> (s0, 4096 floordiv s1)>
+func @tile_loop_with_div_in_upper_bound(%t5 : index, %A : memref<? x i32>, %L : index, %U : index) {
+ %c0 = constant 0 : index
+ %M = dim %A, %c0 : memref<? x i32>
+ affine.for %i = 0 to min #ub()[%M, %U] {
+ addi %i, %i : index
+ }
+ // CHECK: affine.for [[ARG1:%arg[0-9]+]] = 0 to min [[UBO0]]()[%{{.*}}, %{{.*}}, [[ARG0]]]
+ // CHECK-NEXT: affine.for %[[I:.*]] = [[LBI0]]([[ARG1]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]({{.*}})[{{.*}}, {{.*}}, [[ARG0]]]
+ // CHECK-NEXT: addi %[[I]], %[[I]]
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0, s1, s2] -> (d0 * s2 + s2 * 4, s0, 4096 floordiv s1)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0, s1, s2] -> (s0 ceildiv s2, (4096 floordiv s1) ceildiv s2)>
+
+// CHECK: func @tile_loop_with_div_in_upper_bound_non_unit_step([[ARG0:%arg[0-9]+]]: index, %{{.*}}: memref<?xi32>, %{{.*}}: index, %{{.*}}: index)
+#ub = affine_map<()[s0, s1] -> (s0, 4096 floordiv s1)>
+func @tile_loop_with_div_in_upper_bound_non_unit_step(%t5 : index, %A : memref<? x i32>, %L : index, %U : index) {
+ %c0 = constant 0 : index
+ %M = dim %A, %c0 : memref<? x i32>
+ affine.for %i = 0 to min #ub()[%M, %U] step 4 {
+ addi %i, %i : index
+ }
+ // CHECK: affine.for [[ARG1:%arg[0-9]+]] = 0 to min [[UBO0]]()[%{{.*}}, %{{.*}}, [[ARG0]]]{{.*}} step 4{{.*}}
+ // CHECK-NEXT: affine.for %[[I:.*]] = [[LBI0]]([[ARG1]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]({{.*}})[{{.*}}, {{.*}}, [[ARG0]]]{{.*}} step 4{{.*}}
+ // CHECK-NEXT: addi %[[I]], %[[I]]
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> ((d0 - 8) * s0 + 8)>
+// CHECK-DAG: [[UBI2:#map[0-9]+]] = affine_map<(d0)[s0, s1] -> ((d0 - 8) * s1 + s1 * 4 + 8, s0 + 16)>
+// CHECK-DAG: [[UBI1:#map[0-9]+]] = affine_map<(d0)[s0, s1] -> ((d0 - 8) * s1 + s1 + 8, s0 + 16)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> ((d0 - 8) * s0 + s0 + 8, 256)>
+// CHECK-DAG: [[UBO1:#map[0-9]+]] = affine_map<()[s0, s1] -> ((s0 + 8) ceildiv s1 + 8)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0] -> (248 ceildiv s0 + 8)>
+
+// CHECK: func @tile_loop_with_non_zero_lb([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index, %{{.*}}: index)
+// CHECK-NEXT: affine.for [[ARG3:%arg[0-9+]]] = 8 to [[UBO0]]{{.*}}[[ARG0]]{{.*}}
+// CHECK-NEXT: affine.for [[ARG4:%arg[0-9+]]] = 8 to [[UBO1]]{{.*}}[[ARG1]]{{.*}}
+// CHECK-NEXT: affine.for [[ARG5:%arg[0-9+]]] = 8 to [[UBO1]]{{.*}}[[ARG2]]{{.*}} step 4
+// CHECK-NEXT: affine.for %[[I:.*]] = [[LBI0]]([[ARG3]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]([[ARG3]]){{.*}}[[ARG0]]{{.*}}
+// CHECK-NEXT: affine.for %[[J:.*]] = [[LBI0]]([[ARG4]]){{.*}}[[ARG1]]{{.*}} to min [[UBI1]]([[ARG4]]){{.*}}[[ARG1]]{{.*}}
+// CHECK-NEXT: affine.for %[[K:.*]] = [[LBI0]]([[ARG5]]){{.*}}[[ARG2]]{{.*}} to min [[UBI2]]([[ARG5]]){{.*}}[[ARG2]]{{.*}}step 4{{.*}}
+// CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) : (index, index, index) -> ()
+#ubi = affine_map<()[s0] -> (s0 + 16)>
+func @tile_loop_with_non_zero_lb(%t0: index, %t1: index, %t2: index, %U: index){
+ affine.for %i = 8 to 256 {
+ affine.for %j = 8 to #ubi()[%U] {
+ affine.for %k = 8 to #ubi()[%U] step 4 {
+ "test.foo"(%i, %j, %k) : (index, index, index) -> ()
+ }
+ }
+ }
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0, 256)>
+// CHECK-DAG: [[UBI1:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0 + s0, 250)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0] -> (256 ceildiv s0)>
+// CHECK-DAG: [[UBO1:#map[0-9]+]] = affine_map<()[s0] -> (250 ceildiv s0)>
+
+// CHECK: func @simple_matmul([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index{{.*}})
+// CHECK-NEXT: affine.for [[ARG3:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG0]]{{.*}}
+// CHECK-NEXT: affine.for [[ARG4:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG1]]{{.*}}
+// CHECK-NEXT: affine.for [[ARG5:%arg[0-9]+]] = 0 to [[UBO1]](){{.*}}[[ARG2]]{{.*}}
+// CHECK-NEXT: affine.for %[[I:.*]] = [[LBI]]{{.*}}[[ARG3]]{{.*}}[[ARG0]]{{.*}} to min [[UBI0]]{{.*}}[[ARG3]]{{.*}}[[ARG0]]{{.*}}
+// CHECK-NEXT: affine.for %[[J:.*]] = [[LBI]]{{.*}}[[ARG4]]{{.*}}[[ARG1]]{{.*}} to min [[UBI0]]{{.*}}[[ARG4]]{{.*}}[[ARG1]]{{.*}}
+// CHECK-NEXT: affine.for %[[K:.*]] = [[LBI]]{{.*}}[[ARG5]]{{.*}}[[ARG2]]{{.*}} to min [[UBI1]]{{.*}}[[ARG5]]{{.*}}[[ARG2]]{{.*}}
+// CHECK-NEXT: affine.load %{{.*}}[%[[I]], %[[K]]]
+// CHECK-NEXT: affine.load %{{.*}}[%[[K]], %[[J]]]
+// CHECK-NEXT: affine.load %{{.*}}[%[[I]], %[[J]]]
+// CHECK-NEXT: mulf %{{.*}}
+// CHECK-NEXT: addf %{{.*}}
+// CHECK-NEXT: affine.store %{{.*}}[%[[I]], %[[J]]]
+func @simple_matmul(%t6 : index, %t7 : index, %t8 : index, %arg0: memref<256x256xvector<64xf32>>, %arg1: memref<256x256xvector<64xf32>>, %arg2: memref<256x256xvector<64xf32>>) -> memref<256x256xvector<64xf32>> {
+ affine.for %i = 0 to 256 {
+ affine.for %j = 0 to 256 {
+ affine.for %k = 0 to 250 {
+ %l = affine.load %arg0[%i, %k] : memref<256x256xvector<64xf32>>
+ %r = affine.load %arg1[%k, %j] : memref<256x256xvector<64xf32>>
+ %o = affine.load %arg2[%i, %j] : memref<256x256xvector<64xf32>>
+ %m = mulf %l, %r : vector<64xf32>
+ %a = addf %o, %m : vector<64xf32>
+ affine.store %a, %arg2[%i, %j] : memref<256x256xvector<64xf32>>
+ }
+ }
+ }
+ return %arg2 : memref<256x256xvector<64xf32>>
+}
+
+// -----
+
+// CHECK-DAG: [[LBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s1, s0)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)>
+
+// CHECK: func @tile_with_symbolic_loop_upper_bounds([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index{{.*}}){{.*}}
+// CHECK: affine.for [[ARG2:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG0]]{{.*}}
+// CHECK-NEXT: affine.for [[ARG3:%arg[0-9]+]] = 0 to [[UBO0]](){{.*}}[[ARG1]]{{.*}}
+// CHECK-NEXT: affine.for %[[I0:.*]] = [[LBI0]]{{.*}}[[ARG2]]{{.*}}[[ARG0]]{{.*}} to min [[UBI0]]{{.*}}[[ARG2]]{{.*}}[[ARG0]]{{.*}}
+// CHECK-NEXT: affine.for %[[I1:.*]] = [[LBI0]]{{.*}}[[ARG3]]{{.*}}[[ARG1]]{{.*}} to min [[UBI0]]{{.*}}[[ARG3]]{{.*}}[[ARG1]]{{.*}}
+// CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[I0]], %[[I1]]] : memref<?x?xf32>
+// CHECK-NEXT: affine.for %[[I2:.*]] = 0 to %{{.*}} {
+// CHECK-NEXT: affine.load %{{.*}}%[[I0]], %[[I2]]
+// CHECK-NEXT: affine.load %{{.*}}%[[I2]], %[[I1]]
+// CHECK-NEXT: mulf
+// CHECK-NEXT: affine.load %{{.*}}%[[I0]], %[[I1]]
+// CHECK-NEXT: addf
+// CHECK-NEXT: affine.store %{{.*}}%[[I0]], %[[I1]]
+func @tile_with_symbolic_loop_upper_bounds(%t9 : index, %t10: index, %arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
+ %cst = constant 0.000000e+00 : f32
+ %c0 = constant 0 : index
+ %0 = dim %arg0, %c0 : memref<?x?xf32>
+ affine.for %i0 = 0 to %0 {
+ affine.for %i1 = 0 to %0 {
+ affine.store %cst, %arg2[%i0, %i1] : memref<?x?xf32>
+ affine.for %i2 = 0 to %0 {
+ %1 = affine.load %arg0[%i0, %i2] : memref<?x?xf32>
+ %2 = affine.load %arg1[%i2, %i1] : memref<?x?xf32>
+ %3 = mulf %1, %2 : f32
+ %4 = affine.load %arg2[%i0, %i1] : memref<?x?xf32>
+ %5 = addf %4, %3 : f32
+ affine.store %5, %arg2[%i0, %i1] : memref<?x?xf32>
+ }
+ }
+ }
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0)[s0, s1, s2] -> (d0 * s2 + s2, s0 + s1)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<()[s0, s1, s2] -> ((s0 + s1) ceildiv s2)>
+
+// CHECK: func @tile_with_loop_upper_bounds_in_two_symbols([[ARG0:%arg[0-9]+]]: index{{.*}}){{.*}}
+func @tile_with_loop_upper_bounds_in_two_symbols(%t11 : index, %arg0: memref<?xf32>, %limit: index) {
+ %c0 = constant 0 : index
+ %dim0 = dim %arg0, %c0 : memref<?xf32>
+ affine.for %i0 = 0 to affine_map<()[s0, s1] -> (s0 + s1)> ()[%dim0, %limit] {
+ %v0 = affine.load %arg0[%i0] : memref<?xf32>
+ }
+ // CHECK: affine.for [[ARG1:%arg[0-9]+]] = 0 to [[UBO0]]()[%{{.*}}, %{{.*}}, [[ARG0]]]
+ // CHECK-NEXT: affine.for %[[I:.*]] = [[LBI0]]([[ARG1]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]([[ARG1]])[{{.*}}, {{.*}}, [[ARG0]]]
+ // CHECK-NEXT: affine.load %{{.*}}[%[[I]]]
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI1:#map[0-9]+]] = affine_map<(d0, d1)[s0, s1] -> (d1 * s1 + s1, d0 + s0 + 4)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0, d1)[s0, s1] -> (d1 * s1 + s1, d0 + s0 + 2)>
+// CHECK-DAG: [[LBO0:#map[0-9]+]] = affine_map<() -> (0)>
+// CHECK-DAG: [[UBO1:#map[0-9]+]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 4) ceildiv s1)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 2) ceildiv s1)>
+
+// CHECK: func @tile_with_upper_bounds_in_dimensions_and_symbols([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index, [[ARG3:%arg[0-9]+]]: index{{.*}}){{.*}}
+// CHECK-NEXT: affine.for [[ARG4:%arg[0-9]+]] = 0 to [[UBO0]]({{.*}}){{.*}}[[ARG0]]
+// CHECK-NEXT: affine.for [[ARG5:%arg[0-9]+]] = 0 to [[UBO1]]({{.*}}){{.*}}[[ARG1]]
+// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG4]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]({{.*}}, [[ARG4]]){{.*}}[[ARG0]]{{.*}}
+// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG5]]){{.*}}[[ARG1]]{{.*}} to min [[UBI1]]({{.*}}, [[ARG5]]){{.*}}[[ARG1]]{{.*}}
+func @tile_with_upper_bounds_in_dimensions_and_symbols(%t12 : index, %t13 :index, %M: index, %N: index, %K: index) {
+ affine.for %i = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 2)>(%M)[%K] {
+ affine.for %j = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 4)>(%N)[%K] {
+ "test.foo" () : () -> ()
+ }
+ }
+ return
+}
+
+// -----
+
+// CHECK-DAG: [[LBI0:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 * s0)>
+// CHECK-DAG: [[UBI1:#map[0-9]+]] = affine_map<(d0, d1)[s0, s1] -> (d1 * s1 + s1 * 4, d0 + s0 + 4)>
+// CHECK-DAG: [[UBI0:#map[0-9]+]] = affine_map<(d0, d1)[s0, s1] -> (d1 * s1 + s1 * 2, d0 + s0 + 2)>
+// CHECK-DAG: [[LBO0:#map[0-9]+]] = affine_map<() -> (0)>
+// CHECK-DAG: [[UBO1:#map[0-9]+]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 4) ceildiv s1)>
+// CHECK-DAG: [[UBO0:#map[0-9]+]] = affine_map<(d0)[s0, s1] -> ((d0 + s0 + 2) ceildiv s1)>
+
+// CHECK: func @tile_with_upper_bounds_in_dimensions_and_symbols_non_unit_steps
+// CHECK-SAME: ([[ARG0:%arg[0-9]+]]: index, [[ARG1:%arg[0-9]+]]: index, [[ARG2:%arg[0-9]+]]: index, [[ARG3:%arg[0-9]+]]: index{{.*}}){{.*}}
+// CHECK-NEXT: affine.for [[ARG4:%arg[0-9]+]] = 0 to [[UBO0]]({{.*}}){{.*}}[[ARG0]]{{.*}} step 2{{.*}}
+// CHECK-NEXT: affine.for [[ARG5:%arg[0-9]+]] = 0 to [[UBO1]]({{.*}}){{.*}}[[ARG1]]{{.*}} step 4{{.*}}
+// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG4]]){{.*}}[[ARG0]]{{.*}} to min [[UBI0]]({{.*}}, [[ARG4]]){{.*}}[[ARG0]]{{.*}} step 2{{.*}}
+// CHECK-NEXT: affine.for {{.*}} = [[LBI0]]([[ARG5]]){{.*}}[[ARG1]]{{.*}} to min [[UBI1]]({{.*}}, [[ARG5]]){{.*}}[[ARG1]]{{.*}} step 4{{.*}}
+func @tile_with_upper_bounds_in_dimensions_and_symbols_non_unit_steps(%t12 : index, %t13 :index, %M: index, %N : index, %K: index) {
+ affine.for %i = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 2)>(%M)[%K] step 2 {
+ affine.for %j = 0 to affine_map<(d0)[s0] -> (d0 + s0 + 4)>(%N)[%K] step 4 {
+ "test.foo" () : () -> ()
+ }
+ }
+ return
+}
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 3ac1e7c55235..99424f1c9c06 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -1,6 +1,7 @@
# Exclude tests from libMLIR.so
add_mlir_library(MLIRTestTransforms
TestAllReduceLowering.cpp
+ TestAffineLoopParametricTiling.cpp
TestBufferPlacement.cpp
TestExpandTanh.cpp
TestCallGraph.cpp
diff --git a/mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp b/mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp
new file mode 100644
index 000000000000..5d369e62ae43
--- /dev/null
+++ b/mlir/test/lib/Transforms/TestAffineLoopParametricTiling.cpp
@@ -0,0 +1,90 @@
+//= TestAffineLoopParametricTiling.cpp -- Parametric Affine loop tiling pass =//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a test pass to test parametric tiling of perfectly
+// nested affine for loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Transforms/LoopUtils.h"
+
+using namespace mlir;
+
+#define DEBUG_TYPE "test-affine-parametric-tile"
+
+namespace {
+
+struct TestAffineLoopParametricTiling
+ : public PassWrapper<TestAffineLoopParametricTiling, FunctionPass> {
+ void runOnFunction() override;
+};
+} // end anonymous namespace
+
+/// Checks if the function enclosing the loop nest has any arguments passed to
+/// it, which can be used as tiling parameters. Assumes that atleast 'n'
+/// arguments are passed, where 'n' is the number of loops in the loop nest.
+static void checkIfTilingParametersExist(ArrayRef<AffineForOp> band) {
+ assert(!band.empty() && "no loops in input band");
+ AffineForOp topLoop = band[0];
+
+ if (FuncOp funcOp = dyn_cast<FuncOp>(topLoop.getParentOp()))
+ assert(funcOp.getNumArguments() >= band.size() && "Too few tile sizes");
+}
+
+/// Captures tiling parameters, which are expected to be passed as arguments
+/// to the function enclosing the loop nest. Also checks if the required
+/// parameters are of index type. This approach is temporary for testing
+/// purposes.
+static void getTilingParameters(ArrayRef<AffineForOp> band,
+ SmallVectorImpl<Value> &tilingParameters) {
+ AffineForOp topLoop = band[0];
+ Region *funcOpRegion = topLoop.getParentRegion();
+ unsigned nestDepth = band.size();
+
+ for (BlockArgument blockArgument :
+ funcOpRegion->getArguments().take_front(nestDepth)) {
+ if (blockArgument.getArgNumber() < nestDepth) {
+ assert(blockArgument.getType().isIndex() &&
+ "expected tiling parameters to be of index type.");
+ tilingParameters.push_back(blockArgument);
+ }
+ }
+}
+
+void TestAffineLoopParametricTiling::runOnFunction() {
+ // Bands of loops to tile.
+ std::vector<SmallVector<AffineForOp, 6>> bands;
+ getTileableBands(getFunction(), &bands);
+
+ // Tile each band.
+ for (SmallVectorImpl<AffineForOp> &band : bands) {
+ // Capture the tiling parameters from the arguments to the function
+ // enclosing this loop nest.
+ SmallVector<AffineForOp, 6> tiledNest;
+ SmallVector<Value, 6> tilingParameters;
+ // Check if tiling parameters are present.
+ checkIfTilingParametersExist(band);
+
+ // Get function arguments as tiling parameters.
+ getTilingParameters(band, tilingParameters);
+
+ if (failed(
+ tilePerfectlyNestedParametric(band, tilingParameters, &tiledNest)))
+ return signalPassFailure();
+ }
+}
+
+namespace mlir {
+void registerTestAffineLoopParametricTilingPass() {
+ PassRegistration<TestAffineLoopParametricTiling>(
+ "test-affine-parametric-tile",
+ "Tile affine loops using SSA values as tile sizes");
+}
+} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index e46327aa6399..93934d40fe59 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -41,6 +41,7 @@ void registerSimpleParametricTilingPass();
void registerSliceAnalysisTestPass();
void registerSymbolTestPasses();
void registerTestAffineDataCopyPass();
+void registerTestAffineLoopParametricTilingPass();
void registerTestAffineLoopUnswitchingPass();
void registerTestAllReduceLoweringPass();
void registerTestBufferPlacementPreparationPass();
@@ -104,6 +105,7 @@ void registerTestPasses() {
#if MLIR_ROCM_CONVERSIONS_ENABLED
registerTestConvertGPUKernelToHsacoPass();
#endif
+ registerTestAffineLoopParametricTilingPass();
registerTestBufferPlacementPreparationPass();
registerTestDominancePass();
registerTestFunc();
More information about the Mlir-commits
mailing list