[libcxx-commits] [libcxx] [libc] [lldb] [clang-tools-extra] [llvm] [mlir] [compiler-rt] [lld] [clang] [MLIR][LLVM] Add Continuous Loop Peeling transform to SCF (PR #71555)
Matthias Springer via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Jan 8 06:16:59 PST 2024
================
@@ -105,6 +106,167 @@ static void specializeForLoopForUnrolling(ForOp op) {
op.erase();
}
+/// Create a new for loop for the remaining iterations (partiaIteration)
+/// after a for loop has been peeled. This is followed by correcting the
+/// loop bounds for both loops given the index (splitBound) where the
+/// iteration space is to be split up.
+static LogicalResult splitLoopHelper(RewriterBase &b, scf::ForOp forOp,
+ scf::ForOp &partialIteration,
+ Value &splitBound) {
+ RewriterBase::InsertionGuard guard(b);
+ auto lbInt = getConstantIntValue(forOp.getLowerBound());
+ auto ubInt = getConstantIntValue(forOp.getUpperBound());
+ auto stepInt = getConstantIntValue(forOp.getStep());
+
+ // No specialization necessary if step already divides upper bound evenly.
+ if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) % *stepInt == 0)
+ return failure();
+ // No specialization necessary if step size is 1.
+ if (stepInt == static_cast<int64_t>(1))
+ return failure();
+
+ // Create ForOp for partial iteration.
+ b.setInsertionPointAfter(forOp);
+ partialIteration = cast<scf::ForOp>(b.clone(*forOp.getOperation()));
+ partialIteration.getLowerBoundMutable().assign(splitBound);
+ forOp.replaceAllUsesWith(partialIteration->getResults());
+ partialIteration.getInitArgsMutable().assign(forOp->getResults());
+
+ // Set new upper loop bound.
+ b.updateRootInPlace(
+ forOp, [&]() { forOp.getUpperBoundMutable().assign(splitBound); });
+
+ return success();
+}
+
+/// Convert single-iteration for loop to if-else block.
+static scf::IfOp convertSingleIterFor(RewriterBase &b, scf::ForOp &forOp) {
+ Location loc = forOp->getLoc();
+ IRMapping mapping;
+ mapping.map(forOp.getInductionVar(), forOp.getLowerBound());
+ for (auto [arg, operand] :
+ llvm::zip_equal(forOp.getRegionIterArgs(), forOp.getInitsMutable())) {
+ mapping.map(arg, operand.get());
+ }
+ b.setInsertionPoint(forOp);
+ auto cond =
+ b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
+ forOp.getLowerBound(), forOp.getUpperBound());
+ auto ifOp = b.create<scf::IfOp>(loc, forOp->getResultTypes(), cond, true);
+ // then branch
+ SmallVector<Value> bbArgReplacements;
+ bbArgReplacements.push_back(forOp.getLowerBound());
+ llvm::append_range(bbArgReplacements, forOp.getInitArgs());
+
+ b.inlineBlockBefore(forOp.getBody(), ifOp.thenBlock(),
+ ifOp.thenBlock()->begin(), bbArgReplacements);
+ // else branch
+ b.setInsertionPointToStart(ifOp.elseBlock());
+ if (!forOp->getResultTypes().empty()) {
+ b.create<scf::YieldOp>(loc, forOp.getInits());
+ }
+ b.replaceOp(forOp, ifOp->getResults());
+ return ifOp;
+}
+
+/// Rewrite a for loop with bounds/step that potentially do not divide the
+/// iteration space evenly into a chain of for loops where the step is a
+/// power of 2 and decreases exponentially across subsequent loops. Helps
+/// divide the iteration space across all resulting peeled loops evenly.
+///
+/// Optionally, convert all single iteration for loops to if-else
+/// blocks when convert_single_iter_loops_to_if attribute is set to true or
+/// alternatively with the convert-single-iter-loops-to-if option for the
+/// scf-for-loop-continuous-peeling pass.
+static LogicalResult continuousPeelForLoop(RewriterBase &b, ForOp forOp,
+ ForOp &partialIteration,
+ bool convertSingleIterLoopsToIf) {
+
+ scf::ForOp currentLoop;
+ auto lbInt = getConstantIntValue(forOp.getLowerBound());
+ auto stepInt = getConstantIntValue(forOp.getStep());
+
+ // Step size must be a known positive constant greater than 1.
+ if (!stepInt || stepInt <= static_cast<int64_t>(1))
+ return failure();
+
+ Value initialUb = forOp.getUpperBound();
+ Value initialStep = forOp.getStep();
+ uint64_t loopStep = *stepInt;
+ currentLoop = forOp;
+ AffineExpr sym0, sym1, sym2;
+ bindSymbols(b.getContext(), sym0, sym1, sym2);
+ AffineMap defaultSplitMap =
+ AffineMap::get(0, 3, {sym1 - ((sym1 - sym0) % sym2)});
+ AffineMap powerSplitMap = AffineMap::get(0, 3, {sym1 - (sym1 % sym2)});
+ bool usePowerSplit = (lbInt.has_value()) &&
+ (*lbInt % *stepInt == static_cast<int64_t>(0)) &&
+ (loopStep == llvm::bit_floor(loopStep));
+ AffineMap splitMap = usePowerSplit ? powerSplitMap : defaultSplitMap;
+ SmallVector<scf::ForOp> loops;
+ while (loopStep) {
+ b.setInsertionPoint(currentLoop);
+ auto constStepOp =
+ b.create<arith::ConstantIndexOp>(currentLoop.getLoc(), loopStep);
+ b.updateRootInPlace(currentLoop, [&]() {
+ currentLoop.getStepMutable().assign(constStepOp);
+ });
+ b.setInsertionPoint(currentLoop);
+ Value splitBound = b.createOrFold<affine::AffineApplyOp>(
+ currentLoop.getLoc(), splitMap,
+ ValueRange{currentLoop.getLowerBound(), currentLoop.getUpperBound(),
+ currentLoop.getStep()});
+ LogicalResult status =
+ splitLoopHelper(b, currentLoop, partialIteration, splitBound);
+
+ // Canonicalize min/max affine operations
+ // It uses scf::rewritePeeledMinMaxOp to identify operations to be replaced,
+ // they are then replaced by the current step size.
+ // TODO: Alternative method - update affine map to reflect the loop step
+ // Example: min(ub - iv, 8) -> min(ub - iv, 4)
+ currentLoop.walk([&](Operation *affineOp) {
+ if (isa<AffineMinOp, AffineMaxOp>(affineOp)) {
+ FailureOr<AffineApplyOp> result = scf::rewritePeeledMinMaxOp(
+ b, affineOp, currentLoop.getInductionVar(), initialUb, initialStep,
+ /*insideLoop=*/true);
+ // correct the step of the newly created affine op
+ if (!failed(result))
----------------
matthias-springer wrote:
`if (succeeded(result))`
https://github.com/llvm/llvm-project/pull/71555
More information about the libcxx-commits
mailing list