[llvm] [VPlan] Delay adding canonical IV increment. (PR #82270)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 07:18:25 PST 2024
================
@@ -1820,6 +1776,79 @@ void VPlanTransforms::createInterleaveGroups(
}
}
+void VPlanTransforms::lowerCanonicalIV(
+ VPlan &Plan, bool HasNUW, bool DataAndControlFlowWithoutRuntimeCheck) {
+ auto *CanIV = Plan.getCanonicalIV();
+
+ VPBasicBlock *EB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ auto *Term = EB->getTerminator();
+ VPBuilder Builder(Term);
+ DebugLoc DL = CanIV->getDebugLoc();
+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
+ auto *CanonicalIVIncrement =
+ Builder.createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()},
+ {HasNUW, false}, DL, "index.next");
+
+ CanIV->addOperand(CanonicalIVIncrement);
+
+ auto FoundLaneMaskPhi = find_if(
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ [](VPRecipeBase &P) { return isa<VPActiveLaneMaskPHIRecipe>(P); });
+
+ if (FoundLaneMaskPhi ==
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis().end()) {
+
+ // Update BranchOnCount VPInstruction in the latch to use increment.
+ // TODO: Should have separate opcodes for separate semantics.
+ Term->setOperand(0, CanonicalIVIncrement);
+ return;
+ }
+
+ // Now introduce a conditional branch to control the loop until the lane mask
+ // is exhuasted.
+ auto *LaneMaskPhi = cast<VPActiveLaneMaskPHIRecipe>(&*FoundLaneMaskPhi);
+ auto *VecPreheader =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+ Builder.setInsertPoint(VecPreheader);
+
+ VPValue *TC = Plan.getTripCount();
+
+ // TODO: Check if dropping the flags is needed if
+ // !DataAndControlFlowWithoutRuntimeCheck.
+ CanonicalIVIncrement->dropPoisonGeneratingFlags();
+ VPValue *TripCount, *IncrementValue;
+ if (!DataAndControlFlowWithoutRuntimeCheck) {
+ // When the loop is guarded by a runtime overflow check for the loop
+ // induction variable increment by VF, we can increment the value before
+ // the get.active.lane mask and use the unmodified tripcount.
+ IncrementValue = CanonicalIVIncrement;
+ TripCount = TC;
+ } else {
+ // When avoiding a runtime check, the active.lane.mask inside the loop
+ // uses a modified trip count and the induction variable increment is
+ // done after the active.lane.mask intrinsic is called.
+ IncrementValue = CanIV;
+ TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
+ {TC}, DL);
+ }
+ // Create the active lane mask for the next iteration of the loop before the
+ // original terminator.
+ Builder.setInsertPoint(EB);
+ auto *InLoopIncrement =
+ Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
+ {IncrementValue}, {false, false}, DL);
+ auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
+ {InLoopIncrement, TripCount}, DL,
+ "active.lane.mask.next");
+ LaneMaskPhi->addOperand(ALM);
+
+ // Replace the original terminator with BranchOnCond. We have to invert the
+ // mask here because a true condition means jumping to the exit block.
+ auto *NotMask = Builder.createNot(ALM, DL);
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
+ Term->eraseFromParent();
----------------
ayalz wrote:
The existing terminator is already a branch on cond; reset its operand?
https://github.com/llvm/llvm-project/pull/82270
More information about the llvm-commits
mailing list