[llvm] [VPlan] Enable vectorization of early-exit loops with unit-stride fault-only-first loads (PR #151300)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 01:53:47 PST 2025


================
@@ -3144,6 +3144,137 @@ void VPlanTransforms::addExplicitVectorLength(
   Plan.setUF(1);
 }
 
+void VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety(VPlan &Plan) {
+  using namespace SCEVPatternMatch;
+  VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  VPWidenIntrinsicRecipe *LastFFLoad = nullptr;
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_deep(Plan.getVectorLoopRegion())))
+    for (VPRecipeBase &R : *VPBB)
+      if (match(&R, m_Intrinsic<Intrinsic::vp_load_ff>(m_VPValue(), m_VPValue(),
+                                                       m_VPValue()))) {
+        assert(!LastFFLoad && "Only one FFLoad is supported");
+        LastFFLoad = cast<VPWidenIntrinsicRecipe>(&R);
+      }
+
+  // Skip if no FFLoad.
+  if (!LastFFLoad)
+    return;
+
+  // Ensure FFLoad does not read past the remainder in the last iteration.
+  // Set AVL to min(VF, remainder).
+  VPBuilder Builder(Header, Header->getFirstNonPhi());
+  DebugLoc DL = LastFFLoad->getDebugLoc();
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();
+  VPValue *Remainder = Builder.createNaryOp(
+      Instruction::Sub, {&Plan.getVectorTripCount(), CanonicalIVPHI}, DL);
+  VPValue *Cmp =
+      Builder.createICmp(CmpInst::ICMP_ULE, &Plan.getVF(), Remainder, DL);
+  VPValue *AVL = Builder.createSelect(Cmp, &Plan.getVF(), Remainder, DL);
+  Type *CanIVTy = CanonicalIVPHI->getScalarType();
+  Type *I32Ty = IntegerType::getInt32Ty(Plan.getContext());
+  AVL = Builder.createScalarZExtOrTrunc(AVL, I32Ty, CanIVTy, DL);
+  LastFFLoad->setOperand(2, AVL);
+
+  // To prevent branch-on-poison, mask the early-exit condition with
+  // active-lane-mask. Expected pattern here is:
+  // Before:
+  //   EMIT vp<%alt.exit.cond> = any-of vp<%cond>
+  //   EMIT vp<%exit.cond> = or vp<%alt.exit.cond>, vp<%main.exit.cond>
+  //   EMIT branch-on-cond vp<%exit.cond>
+  // After:
+  //   EMIT vp<%faulting.lane> = extractvalue vp<%ffload>, 1
+  //   EMIT vp<%alm> = active lane mask 0, vp<%faulting.lane>
+  //   EMIT vp<%and> = logical-and vp<%alm>, vp<%cond>
+  //   EMIT vp<%alt.exit.cond> = any-of vp<%and>
+  //   EMIT vp<%exit.cond> = or vp<%alt.exit.cond>, vp<%main.exit.cond>
+  //   EMIT branch-on-cond vp<%exit.cond>
+  auto *ExitingLatch =
----------------
lukel97 wrote:

I think this works on the assumption that the poison lanes from the vp.load.ff never get shuffled around, e.g. they aren't reversed. But I think this always holds, just making a note.

https://github.com/llvm/llvm-project/pull/151300


More information about the llvm-commits mailing list