[llvm] [VPlan] Delay adding canonical IV increment and exit branches. (PR #82270)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 15 11:23:24 PDT 2024
================
@@ -1572,3 +1529,71 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
}
}
}
+
+void VPlanTransforms::finalizePlan(VPlan &Plan, bool HasNUW,
+ bool DataAndControlFlowWithoutRuntimeCheck) {
+ auto *CanIV = Plan.getCanonicalIV();
+
+ VPBasicBlock *EB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ VPBuilder Builder(EB);
+ DebugLoc DL = CanIV->getDebugLoc();
+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
+ auto *CanonicalIVIncrement =
+ Builder.createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()},
+ {HasNUW, false}, DL, "index.next");
+
+ CanIV->addOperand(CanonicalIVIncrement);
+
+ auto FoundLaneMaskPhi = find_if(
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ [](VPRecipeBase &P) { return isa<VPActiveLaneMaskPHIRecipe>(P); });
+
+ if (FoundLaneMaskPhi ==
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis().end()) {
+ // Add the BranchOnCount VPInstruction to the latch.
+ Builder.createNaryOp(VPInstruction::BranchOnCount,
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()},
+ DL);
+ return;
+ }
+ auto *LaneMaskPhi = cast<VPActiveLaneMaskPHIRecipe>(&*FoundLaneMaskPhi);
+ auto *VecPreheader =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+ Builder.setInsertPoint(VecPreheader);
+
+ VPValue *TC = Plan.getTripCount();
+
+ // TODO: Check if dropping the flags is needed if
+ // !DataAndControlFlowWithoutRuntimeCheck.
+ CanonicalIVIncrement->dropPoisonGeneratingFlags();
+ VPValue *TripCount, *IncrementValue;
+ if (!DataAndControlFlowWithoutRuntimeCheck) {
+ // When the loop is guarded by a runtime overflow check for the loop
+ // induction variable increment by VF, we can increment the value before
+ // the get.active.lane mask and use the unmodified tripcount.
+ IncrementValue = CanonicalIVIncrement;
+ TripCount = TC;
+ } else {
+ // When avoiding a runtime check, the active.lane.mask inside the loop
+ // uses a modified trip count and the induction variable increment is
+ // done after the active.lane.mask intrinsic is called.
+ IncrementValue = CanIV;
+ TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
+ {TC}, DL);
+ }
+ // Create the active lane mask for the next iteration of the loop before the
+ // original terminator.
+ Builder.setInsertPoint(EB);
+ auto *InLoopIncrement =
+ Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
+ {IncrementValue}, {false, false}, DL);
+ auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
+ {InLoopIncrement, TripCount}, DL,
+ "active.lane.mask.next");
+ LaneMaskPhi->addOperand(ALM);
+
+ // Replace the original terminator with BranchOnCond. We have to invert the
----------------
ayalz wrote:
This now introduces the terminal rather than replace it.
Perhaps a terminal branch-on-count recipe should be introduced along with the abstract canonical IV (could conceptually check the pre-bumped IV with TC-step), delaying only the introduction of the canonical IV's increment between them for later? The canonical IV still remains abstract until this increment is added, but the VPlan continues to be "valid" w/o updating verify().
https://github.com/llvm/llvm-project/pull/82270
More information about the llvm-commits
mailing list