[Mlir-commits] [mlir] 98fa615 - [MLIR] move loopUnrollJamBy*Factor to loop transforms utils
Uday Bondhugula
llvmlistbot at llvm.org
Mon Mar 23 19:40:26 PDT 2020
Author: Uday Bondhugula
Date: 2020-03-24T08:08:57+05:30
New Revision: 98fa615002826e2e67892df0ee6847fbfa182308
URL: https://github.com/llvm/llvm-project/commit/98fa615002826e2e67892df0ee6847fbfa182308
DIFF: https://github.com/llvm/llvm-project/commit/98fa615002826e2e67892df0ee6847fbfa182308.diff
LOG: [MLIR] move loopUnrollJamBy*Factor to loop transforms utils
The declarations for these were already part of transforms utils, but
the definitions were left in affine transforms. Move definitions to loop
transforms utils.
Signed-off-by: Uday Bondhugula <uday at polymagelabs.com>
Differential Revision: https://reviews.llvm.org/D76633
Added:
Modified:
mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
mlir/lib/Transforms/Utils/LoopUtils.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
index aa9d2834ad8d..218a6a2ce23d 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
@@ -101,134 +101,5 @@ LogicalResult LoopUnrollAndJam::runOnAffineForOp(AffineForOp forOp) {
return loopUnrollJamByFactor(forOp, kDefaultUnrollJamFactor);
}
-LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
- uint64_t unrollJamFactor) {
- Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
-
- if (mayBeConstantTripCount.hasValue() &&
- mayBeConstantTripCount.getValue() < unrollJamFactor)
- return loopUnrollJamByFactor(forOp, mayBeConstantTripCount.getValue());
- return loopUnrollJamByFactor(forOp, unrollJamFactor);
-}
-
-/// Unrolls and jams this loop by the specified factor.
-LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
- uint64_t unrollJamFactor) {
- // Gathers all maximal sub-blocks of operations that do not themselves
- // include a for op (a operation could have a descendant for op though
- // in its tree). Ignore the block terminators.
- struct JamBlockGatherer {
- // Store iterators to the first and last op of each sub-block found.
- std::vector<std::pair<Block::iterator, Block::iterator>> subBlocks;
-
- // This is a linear time walk.
- void walk(Operation *op) {
- for (auto ®ion : op->getRegions())
- for (auto &block : region)
- walk(block);
- }
- void walk(Block &block) {
- for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
- auto subBlockStart = it;
- while (it != e && !isa<AffineForOp>(&*it))
- ++it;
- if (it != subBlockStart)
- subBlocks.push_back({subBlockStart, std::prev(it)});
- // Process all for insts that appear next.
- while (it != e && isa<AffineForOp>(&*it))
- walk(&*it++);
- }
- }
- };
-
- assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
-
- if (unrollJamFactor == 1)
- return promoteIfSingleIteration(forOp);
-
- if (forOp.getBody()->empty() ||
- forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
- return failure();
-
- // Loops where both lower and upper bounds are multi-result maps won't be
- // unrolled (since the trip can't be expressed as an affine function in
- // general).
- // TODO(mlir-team): this may not be common, but we could support the case
- // where the lower bound is a multi-result map and the ub is a single result
- // one.
- if (forOp.getLowerBoundMap().getNumResults() != 1)
- return failure();
-
- Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
- // If the trip count is lower than the unroll jam factor, no unroll jam.
- if (mayBeConstantTripCount.hasValue() &&
- mayBeConstantTripCount.getValue() < unrollJamFactor)
- return failure();
-
- auto *forInst = forOp.getOperation();
-
- // Gather all sub-blocks to jam upon the loop being unrolled.
- JamBlockGatherer jbg;
- jbg.walk(forInst);
- auto &subBlocks = jbg.subBlocks;
-
- // Generate the cleanup loop if trip count isn't a multiple of
- // unrollJamFactor.
- if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
- // Insert the cleanup loop right after 'forOp'.
- OpBuilder builder(forInst->getBlock(), std::next(Block::iterator(forInst)));
- auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forInst));
- // Adjust the lower bound of the cleanup loop; its upper bound is the same
- // as the original loop's upper bound.
- AffineMap cleanupMap;
- SmallVector<Value, 4> cleanupOperands;
- getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
- &cleanupOperands, builder);
- cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
-
- // Promote the cleanup loop if it has turned into a single iteration loop.
- promoteIfSingleIteration(cleanupAffineForOp);
-
- // Adjust the upper bound of the original loop - it will be the same as the
- // cleanup loop's lower bound. Its lower bound remains unchanged.
- forOp.setUpperBound(cleanupOperands, cleanupMap);
- }
-
- // Scale the step of loop being unroll-jammed by the unroll-jam factor.
- int64_t step = forOp.getStep();
- forOp.setStep(step * unrollJamFactor);
-
- auto forOpIV = forOp.getInductionVar();
- // Unroll and jam (appends unrollJamFactor - 1 additional copies).
- for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
- // Operand map persists across all sub-blocks.
- BlockAndValueMapping operandMapping;
- for (auto &subBlock : subBlocks) {
- // Builder to insert unroll-jammed bodies. Insert right at the end of
- // sub-block.
- OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
-
- // If the induction variable is used, create a remapping to the value for
- // this unrolled instance.
- if (!forOpIV.use_empty()) {
- // iv' = iv + i, i = 1 to unrollJamFactor-1.
- auto d0 = builder.getAffineDimExpr(0);
- auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
- auto ivUnroll =
- builder.create<AffineApplyOp>(forInst->getLoc(), bumpMap, forOpIV);
- operandMapping.map(forOpIV, ivUnroll);
- }
- // Clone the sub-block being unroll-jammed.
- for (auto it = subBlock.first; it != std::next(subBlock.second); ++it) {
- builder.clone(*it, operandMapping);
- }
- }
- }
-
- // Promote the loop body up if this has turned into a single iteration loop.
- promoteIfSingleIteration(forOp);
- return success();
-}
-
static PassRegistration<LoopUnrollAndJam> pass("affine-loop-unroll-jam",
"Unroll and jam loops");
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 4f6f2b748fec..a1c888b35af8 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -486,6 +486,135 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
return success();
}
+LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
+ uint64_t unrollJamFactor) {
+ Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
+
+ if (mayBeConstantTripCount.hasValue() &&
+ mayBeConstantTripCount.getValue() < unrollJamFactor)
+ return loopUnrollJamByFactor(forOp, mayBeConstantTripCount.getValue());
+ return loopUnrollJamByFactor(forOp, unrollJamFactor);
+}
+
+/// Unrolls and jams this loop by the specified factor.
+LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
+ uint64_t unrollJamFactor) {
+ // Gathers all maximal sub-blocks of operations that do not themselves
+ // include a for op (a operation could have a descendant for op though
+ // in its tree). Ignore the block terminators.
+ struct JamBlockGatherer {
+ // Store iterators to the first and last op of each sub-block found.
+ std::vector<std::pair<Block::iterator, Block::iterator>> subBlocks;
+
+ // This is a linear time walk.
+ void walk(Operation *op) {
+ for (auto ®ion : op->getRegions())
+ for (auto &block : region)
+ walk(block);
+ }
+ void walk(Block &block) {
+ for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
+ auto subBlockStart = it;
+ while (it != e && !isa<AffineForOp>(&*it))
+ ++it;
+ if (it != subBlockStart)
+ subBlocks.push_back({subBlockStart, std::prev(it)});
+ // Process all for insts that appear next.
+ while (it != e && isa<AffineForOp>(&*it))
+ walk(&*it++);
+ }
+ }
+ };
+
+ assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
+
+ if (unrollJamFactor == 1)
+ return promoteIfSingleIteration(forOp);
+
+ if (forOp.getBody()->empty() ||
+ forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
+ return failure();
+
+ // Loops where both lower and upper bounds are multi-result maps won't be
+ // unrolled (since the trip can't be expressed as an affine function in
+ // general).
+ // TODO(mlir-team): this may not be common, but we could support the case
+ // where the lower bound is a multi-result map and the ub is a single result
+ // one.
+ if (forOp.getLowerBoundMap().getNumResults() != 1)
+ return failure();
+
+ Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
+ // If the trip count is lower than the unroll jam factor, no unroll jam.
+ if (mayBeConstantTripCount.hasValue() &&
+ mayBeConstantTripCount.getValue() < unrollJamFactor)
+ return failure();
+
+ auto *forInst = forOp.getOperation();
+
+ // Gather all sub-blocks to jam upon the loop being unrolled.
+ JamBlockGatherer jbg;
+ jbg.walk(forInst);
+ auto &subBlocks = jbg.subBlocks;
+
+ // Generate the cleanup loop if trip count isn't a multiple of
+ // unrollJamFactor.
+ if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
+ // Insert the cleanup loop right after 'forOp'.
+ OpBuilder builder(forInst->getBlock(), std::next(Block::iterator(forInst)));
+ auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forInst));
+ // Adjust the lower bound of the cleanup loop; its upper bound is the same
+ // as the original loop's upper bound.
+ AffineMap cleanupMap;
+ SmallVector<Value, 4> cleanupOperands;
+ getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
+ &cleanupOperands, builder);
+ cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
+
+ // Promote the cleanup loop if it has turned into a single iteration loop.
+ promoteIfSingleIteration(cleanupAffineForOp);
+
+ // Adjust the upper bound of the original loop - it will be the same as the
+ // cleanup loop's lower bound. Its lower bound remains unchanged.
+ forOp.setUpperBound(cleanupOperands, cleanupMap);
+ }
+
+ // Scale the step of loop being unroll-jammed by the unroll-jam factor.
+ int64_t step = forOp.getStep();
+ forOp.setStep(step * unrollJamFactor);
+
+ auto forOpIV = forOp.getInductionVar();
+ // Unroll and jam (appends unrollJamFactor - 1 additional copies).
+ for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
+ // Operand map persists across all sub-blocks.
+ BlockAndValueMapping operandMapping;
+ for (auto &subBlock : subBlocks) {
+ // Builder to insert unroll-jammed bodies. Insert right at the end of
+ // sub-block.
+ OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
+
+ // If the induction variable is used, create a remapping to the value for
+ // this unrolled instance.
+ if (!forOpIV.use_empty()) {
+ // iv' = iv + i, i = 1 to unrollJamFactor-1.
+ auto d0 = builder.getAffineDimExpr(0);
+ auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
+ auto ivUnroll =
+ builder.create<AffineApplyOp>(forInst->getLoc(), bumpMap, forOpIV);
+ operandMapping.map(forOpIV, ivUnroll);
+ }
+ // Clone the sub-block being unroll-jammed.
+ for (auto it = subBlock.first; it != std::next(subBlock.second); ++it) {
+ builder.clone(*it, operandMapping);
+ }
+ }
+ }
+
+ // Promote the loop body up if this has turned into a single iteration loop.
+ promoteIfSingleIteration(forOp);
+ return success();
+}
+
/// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
/// nested within 'forOpA' as the only non-terminator operation in its block.
void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
More information about the Mlir-commits
mailing list