[llvm] [VPlan] Delay adding canonical IV increment. (PR #82270)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 15:00:26 PST 2025
================
@@ -2043,6 +1986,98 @@ void VPlanTransforms::createInterleaveGroups(
}
}
+void VPlanTransforms::convertCanonicalIV(
+ VPlan &Plan, bool HasNUW, bool DataAndControlFlowWithoutRuntimeCheck) {
+ auto *CanIV = Plan.getCanonicalIV();
+
+ VPBasicBlock *Latch = Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ auto *LatchTerm = Latch->getTerminator();
+ VPBuilder Builder(LatchTerm);
+ DebugLoc DL = CanIV->getDebugLoc();
+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
+ auto *CanonicalIVIncrement =
+ Builder.createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()},
+ {HasNUW, false}, DL, "index.next");
+ CanIV->addOperand(CanonicalIVIncrement);
+
+ auto FoundLaneMaskPhi = find_if(
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ [](VPRecipeBase &P) { return isa<VPActiveLaneMaskPHIRecipe>(P); });
+
+ if (FoundLaneMaskPhi ==
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis().end()) {
+
+ // Update BranchOnCount VPInstruction in the latch to use increment.
+ // TODO: Should have separate opcodes for separate semantics.
+ LatchTerm->setOperand(0, CanonicalIVIncrement);
+ return;
+ }
+
+ // Now adjust the start value of the active-lane-mask depending on
+ // DataAndControlFlowWithoutRuntimeCheck, introduce its increment and a
+ // conditional branch to control the loop until the lane mask is exhausted.
+ // Concretely, we add the following recipes:
+ //
+ // vector.ph:
+ // %TripCount = calculate-trip-count-minus-VF (original TC)
+ // [if DataWithControlFlowWithoutRuntimeCheck]
+ // %EntryInc = canonical-iv-increment-for-part %StartV
+ // %EntryALM = active-lane-mask %EntryInc, %TripCount (replaces the
+ // existing start value for the existing active-lane-mask-phi)
+ //
+ // vector.body:
+ // ...
+ // (existing) %P = active-lane-mask-phi [ %EntryALM, %vector.ph ],
+ // (added increment) [ %ALM, %vector.body ]
+ // ...
+ // %InLoopInc = canonical-iv-increment-for-part %IncrementValue
+ // %ALM = active-lane-mask %InLoopInc, TripCount
+ // %Negated = Not %ALM
+ // branch-on-cond %Negated (replaces existing BranchOnCount)
+
+ auto *LaneMaskPhi = cast<VPActiveLaneMaskPHIRecipe>(&*FoundLaneMaskPhi);
+ auto *VecPreheader =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+ Builder.setInsertPoint(VecPreheader);
+
+ VPValue *TC = Plan.getTripCount();
+
+ // TODO: Check if dropping the flags is needed if
+ // !DataAndControlFlowWithoutRuntimeCheck.
+ CanonicalIVIncrement->dropPoisonGeneratingFlags();
+ VPValue *TripCount, *IncrementValue;
+ if (!DataAndControlFlowWithoutRuntimeCheck) {
+ // When the loop is guarded by a runtime overflow check for the loop
+ // induction variable increment by VF, we can increment the value before
+ // the get.active.lane mask and use the unmodified tripcount.
+ IncrementValue = CanonicalIVIncrement;
+ TripCount = TC;
+ } else {
+ // When avoiding a runtime check, the active.lane.mask inside the loop
+ // uses a modified trip count and the induction variable increment is
+ // done after the active.lane.mask intrinsic is called.
+ IncrementValue = CanIV;
+ TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
+ {TC}, DL);
+ }
+ // Create the active lane mask for the next iteration of the loop before the
+ // original terminator.
+ Builder.setInsertPoint(Latch);
+ auto *InLoopIncrement =
+ Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
+ {IncrementValue}, {false, false}, DL);
+ auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
+ {InLoopIncrement, TripCount}, DL,
+ "active.lane.mask.next");
+ LaneMaskPhi->addOperand(ALM);
+
+ // Replace the original terminator with BranchOnCond. We have to invert the
+ // mask here because a true condition means jumping to the exit block.
+ auto *NotMask = Builder.createNot(ALM, DL);
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
----------------
ayalz wrote:
BranchOnCond operates on a single condition bit, so NotMask (only last part thereof?) should effectively be reduced using AnyOf reduction - although its execute/generate seems to look for the first lane (only), rather than the last?
https://github.com/llvm/llvm-project/pull/82270
More information about the llvm-commits
mailing list