[llvm] [LV] Transform to handle exits in the scalar loop (PR #148626)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 02:21:43 PST 2025
================
@@ -3610,6 +3616,147 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
LatchExitingBranch->eraseFromParent();
}
+bool VPlanTransforms::handleUncountableExitsInScalarLoop(VPlan &Plan) {
+ assert(!Plan.hasScalarVFOnly() &&
+ "Cannot transform uncountable exits in scalar loop");
+
+ // We can abandon a vplan entirely if we return false here, so we shouldn't
+ // crash if some earlier assumptions on scalar IR don't hold for the vplan
+ // version of the loop.
+ VPCanonicalIVPHIRecipe *IV = Plan.getVectorLoopRegion()->getCanonicalIV();
+ VPInstruction *IVUpdate = dyn_cast<VPInstruction>(IV->getBackedgeValue());
+ if (!IVUpdate)
+ return false;
+
+ SmallVector<VPRecipeBase *, 2> GEPs;
+ SmallVector<VPRecipeBase *, 8> ConditionRecipes;
+
+ std::optional<VPValue *> Cond =
+ vputils::getRecipesForUncountableExit(Plan, ConditionRecipes, GEPs);
+ if (!Cond)
+ return false;
+
+ // Check GEPs to see if we can link them to the canonical IV.
+ using namespace llvm::VPlanPatternMatch;
+ for (auto *GEP : GEPs)
+ if (!match(GEP,
+ m_GetElementPtr(m_LiveIn(),
+ m_ScalarIVSteps(m_Specific(IV), m_SpecificInt(1),
+ m_Specific(&Plan.getVF())))))
+ return false;
+
+ // Clone the condition recipes into the preheader
+ SmallDenseMap<VPRecipeBase *, VPRecipeBase *, 8> CloneMap;
+ VPBasicBlock *VectorPH = Plan.getVectorPreheader();
+ for (VPRecipeBase *R : reverse(ConditionRecipes)) {
+ VPRecipeBase *Clone = R->clone();
+ VectorPH->appendRecipe(Clone);
+ CloneMap[R] = Clone;
+ }
+
+ // Remap the cloned recipes to use the corresponding operands.
+ for (VPRecipeBase *R : ConditionRecipes) {
+ auto *Clone = CloneMap.at(R);
+ for (unsigned I = 0; I < R->getNumOperands(); ++I)
+ if (VPRecipeBase *OpR =
+ CloneMap.lookup(R->getOperand(I)->getDefiningRecipe()))
+ Clone->setOperand(I, OpR->getVPSingleValue());
+ }
+
+ // Adjust preheader GEPs to match the value they would have for the first
+ // iteration of the vector body.
+ for (auto *GEP : GEPs)
+ CloneMap.at(GEP)->setOperand(1, IV->getStartValue());
+
+ // Split vector preheader to form a new bypass block.
+ VPBasicBlock *NewPH = VectorPH->splitAt(VectorPH->end());
+ VPBasicBlock *ScalarPH = Plan.getScalarPreheader();
+
+ // Create bypass block branch.
+ VPRecipeBase *Uncountable = (*Cond)->getDefiningRecipe();
+ VPRecipeBase *PHUncountable = CloneMap.at(Uncountable);
+ VPBuilder PHBuilder(VectorPH, VectorPH->end());
+ VPValue *PHAnyOf = PHBuilder.createNaryOp(
+ VPInstruction::AnyOf, {PHUncountable->getVPSingleValue()});
+ PHBuilder.createNaryOp(VPInstruction::BranchOnCond, {PHAnyOf},
+ PHUncountable->getDebugLoc());
+ VectorPH->clearSuccessors();
+ NewPH->clearPredecessors();
+ VPBlockUtils::connectBlocks(VectorPH, ScalarPH);
+ VPBlockUtils::connectBlocks(VectorPH, NewPH);
+
+ // Modify plan so that other check blocks (e.g. SCEVs) can be attached to
+ // the correct block.
+ Plan.setEarlyExitPreheader(VectorPH);
+
+ // Fix up the resume phi in scalar preheader -- we might not have reached
+ // the calculated maximum vector tripcount, so just use the next value of IV.
+ VPBasicBlock *MiddleBlock = Plan.getMiddleBlock();
+ VPValue *VecTC = &Plan.getVectorTripCount();
+ for (VPRecipeBase &PHI : ScalarPH->phis()) {
+ VPPhi *ResumePHI = dyn_cast<VPPhi>(&PHI);
+ VPValue *EntryVal = nullptr;
+ for (unsigned I = 0; I < ResumePHI->getNumIncoming(); ++I) {
+ const VPBasicBlock *Block = ResumePHI->getIncomingBlock(I);
+ VPValue *V = ResumePHI->getIncomingValue(I);
+ if (Block == Plan.getEntry()) {
+ EntryVal = ResumePHI->getIncomingValue(I);
+ } else if (Block == MiddleBlock && V == VecTC) {
+ ResumePHI->setOperand(I, IVUpdate);
+ } else {
+ return false;
+ }
+ }
+
+ if (!EntryVal)
+ return false;
+ ResumePHI->addOperand(EntryVal);
+ }
+
+ // Move the IV update if necessary, then update the index operand of the GEP
+ // so that we load the next vector iteration's exit condition data.
+ VPDominatorTree VPDT(Plan);
+ for (auto *GEP : GEPs) {
+ if (!VPDT.properlyDominates(IVUpdate, GEP))
+ IVUpdate->moveBefore(*GEP->getParent(), GEP->getIterator());
+ GEP->setOperand(1, IVUpdate);
+ }
+
+ // Convert loads for the next vector iteration to use a mask so that we
+ // avoid any accesses that the scalar loop would not have performed.
+ for (VPRecipeBase *R : ConditionRecipes) {
+ if (auto *Load = dyn_cast<VPWidenLoadRecipe>(R)) {
+ // Bail out for now if it's already conditional.
----------------
sdesmalen-arm wrote:
I guess this is bailing out because otherwise it requires cloning the condition for the load itself as well? Might be worth clarifying that in the comment.
https://github.com/llvm/llvm-project/pull/148626
More information about the llvm-commits
mailing list