[llvm] [VPlan] Delay adding canonical IV increment and exit branches. (PR #82270)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 9 14:16:02 PDT 2024
================
@@ -1572,3 +1529,71 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
}
}
}
+
+void VPlanTransforms::finalizePlan(VPlan &Plan, bool HasNUW,
+ bool DataAndControlFlowWithoutRuntimeCheck) {
+ auto *CanIV = Plan.getCanonicalIV();
+
+ VPBasicBlock *EB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ VPBuilder Builder(EB);
+ DebugLoc DL = CanIV->getDebugLoc();
+ // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
+ auto *CanonicalIVIncrement =
+ Builder.createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()},
+ {HasNUW, false}, DL, "index.next");
+
+ CanIV->addOperand(CanonicalIVIncrement);
+
+ auto FoundLaneMaskPhi = find_if(
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+ [](VPRecipeBase &P) { return isa<VPActiveLaneMaskPHIRecipe>(P); });
+
+ if (FoundLaneMaskPhi ==
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis().end()) {
+ // Add the BranchOnCount VPInstruction to the latch.
+ Builder.createNaryOp(VPInstruction::BranchOnCount,
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()},
+ DL);
+ return;
+ }
+ auto *LaneMaskPhi = cast<VPActiveLaneMaskPHIRecipe>(&*FoundLaneMaskPhi);
+ auto *VecPreheader =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+ Builder.setInsertPoint(VecPreheader);
+
+ VPValue *TC = Plan.getTripCount();
+
+ // TODO: Check if dropping the flags is needed if
+ // !DataAndControlFlowWithoutRuntimeCheck.
+ CanonicalIVIncrement->dropPoisonGeneratingFlags();
+ VPValue *TripCount, *IncrementValue;
+ if (!DataAndControlFlowWithoutRuntimeCheck) {
+ // When the loop is guarded by a runtime overflow check for the loop
+ // induction variable increment by VF, we can increment the value before
+ // the get.active.lane mask and use the unmodified tripcount.
+ IncrementValue = CanonicalIVIncrement;
+ TripCount = TC;
+ } else {
+ // When avoiding a runtime check, the active.lane.mask inside the loop
+ // uses a modified trip count and the induction variable increment is
+ // done after the active.lane.mask intrinsic is called.
+ IncrementValue = CanIV;
+ TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
+ {TC}, DL);
+ }
+ // Create the active lane mask for the next iteration of the loop before the
+ // original terminator.
+ Builder.setInsertPoint(EB);
+ auto *InLoopIncrement =
+ Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
+ {IncrementValue}, {false, false}, DL);
+ auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
+ {InLoopIncrement, TripCount}, DL,
+ "active.lane.mask.next");
+ LaneMaskPhi->addOperand(ALM);
+
+ // Replace the original terminator with BranchOnCond. We have to invert the
----------------
fhahn wrote:
Updated to initially still introduce the branch early. At the moment it still uses the same opcode, but would probably need to introduce a separate one for the different semantics (or define them conditionally on whether lowering has been finalized)
https://github.com/llvm/llvm-project/pull/82270
More information about the llvm-commits
mailing list