[llvm] [VPlan] Enable vectorization of early-exit loops with unit-stride fault-only-first loads (PR #151300)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 20 18:43:08 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Shih-Po Hung (arcbbb)
<details>
<summary>Changes</summary>
Following #<!-- -->152422, this patch enables auto-vectorization of early-exit loops containing a single potentially faulting, unit-stride load by using the vp.load.ff intrinsic introduced in #<!-- -->128593.
Key changes:
- Add VPWidenFFLoadRecipe that produces two results: the loaded vector and VL (the number of lanes actually loaded).
- Use VL to ensure correctness:
- Step the induction variable by VL (variable-length stepping).
- Cap AVL to min(VF, remainder) to avoid over-reads at last iteration.
- Compute the early-exit condition from VL by replacing AnyOf with vp.reduce.or to avoid branch-on-poison.
- Introduce two transforms:
- adjustFFLoadEarlyExitForPoisonSafety: rewrites the exit condition (AnyOf → vp.reduce.or(VL)) and sets AVL = min(VF, remainder).
- convertFFLoadEarlyExitToVLStepping: after region dissolution, converts early-exit loops to step by VL.
Limitations:
- Supports a single potentially faulting load with unit stride.
- Interleave count (IC) must be 1.
---
Patch is 34.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151300.diff
9 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+41-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+45)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+3-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+43)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+96)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+11)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (+2-2)
- (added) llvm/test/Transforms/LoopVectorize/RISCV/find.ll (+236)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1d3cffa2b61bf..e28d4c45d4ab8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -393,6 +393,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));
+static cl::opt<bool>
+ EnableEarlyExitWithFFLoads("enable-early-exit-with-ffload", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable vectorization of early-exit "
+ "loops with fault-only-first loads."));
+
static cl::opt<bool> ConsiderRegPressure(
"vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
cl::desc("Discard VFs if their register pressure is too high."));
@@ -3507,6 +3513,15 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}
+ if (!Legal->getPotentiallyFaultingLoads().empty() && UserIC > 1) {
+ reportVectorizationFailure("Auto-vectorization of loops with potentially "
+ "faulting loads is not supported when the "
+ "interleave count is more than 1",
+ "CantInterleaveLoopWithPotentiallyFaultingLoads",
+ ORE, TheLoop);
+ return FixedScalableVFPair::getNone();
+ }
+
ScalarEvolution *SE = PSE.getSE();
ElementCount TC = getSmallConstantTripCount(SE, TheLoop);
unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
@@ -4076,6 +4091,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPReductionPHISC:
case VPDef::VPInterleaveEVLSC:
case VPDef::VPInterleaveSC:
+ case VPDef::VPWidenFFLoadSC:
case VPDef::VPWidenLoadEVLSC:
case VPDef::VPWidenLoadSC:
case VPDef::VPWidenStoreEVLSC:
@@ -4550,6 +4566,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
if (!Legal->isSafeForAnyVectorWidth())
return 1;
+ // No interleaving for potentially faulting loads.
+ if (!Legal->getPotentiallyFaultingLoads().empty())
+ return 1;
+
// We don't attempt to perform interleaving for loops with uncountable early
// exits because the VPInstruction::AnyOf code cannot currently handle
// multiple parts.
@@ -7216,6 +7236,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
+
+ VPlanTransforms::convertFFLoadEarlyExitToVLStepping(BestVPlan);
+
// Canonicalize EVL loops after regions are dissolved.
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
@@ -7598,6 +7621,10 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Builder.insert(VectorPtr);
Ptr = VectorPtr;
}
+ if (Legal->getPotentiallyFaultingLoads().contains(I))
+ return new VPWidenFFLoadRecipe(*cast<LoadInst>(I), Ptr, &Plan.getVF(), Mask,
+ VPIRMetadata(*I, LVer), I->getDebugLoc());
+
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
VPIRMetadata(*Load, LVer), I->getDebugLoc());
@@ -8632,6 +8659,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
if (Recipe->getNumDefinedValues() == 1) {
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
Old2New[SingleDef] = Recipe->getVPSingleValue();
+ } else if (isa<VPWidenFFLoadRecipe>(Recipe)) {
+ VPValue *Data = Recipe->getVPValue(0);
+ SingleDef->replaceAllUsesWith(Data);
+ Old2New[SingleDef] = Data;
} else {
assert(Recipe->getNumDefinedValues() == 0 &&
"Unexpected multidef recipe");
@@ -8679,6 +8710,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
+ VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety(*Plan);
+
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
@@ -9869,7 +9902,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- if (!LVL.getPotentiallyFaultingLoads().empty()) {
+ if (EnableEarlyExitWithFFLoads) {
+ if (LVL.getPotentiallyFaultingLoads().size() > 1) {
+ reportVectorizationFailure("Auto-vectorization of loops with more than 1 "
+ "potentially faulting load is not enabled",
+ "MoreThanOnePotentiallyFaultingLoad", ORE, L);
+ return false;
+ }
+ } else if (!LVL.getPotentiallyFaultingLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with potentially "
"faulting load is not supported",
"PotentiallyFaultingLoadsNotSupported", ORE, L);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f79855f7e2c5f..6e28c95ca601a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -563,6 +563,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPInterleaveEVLSC:
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPIRInstructionSC:
+ case VPRecipeBase::VPWidenFFLoadSC:
case VPRecipeBase::VPWidenLoadEVLSC:
case VPRecipeBase::VPWidenLoadSC:
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -2811,6 +2812,13 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
+ VPReductionEVLRecipe(RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp,
+ VPValue *VecOp, VPValue &EVL, VPValue *CondOp,
+ bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
+ : VPReductionRecipe(VPDef::VPReductionEVLSC, RdxKind, FMFs, nullptr,
+ ArrayRef<VPValue *>({ChainOp, VecOp, &EVL}), CondOp,
+ IsOrdered, DL) {}
+
~VPReductionEVLRecipe() override = default;
VPReductionEVLRecipe *clone() override {
@@ -3159,6 +3167,7 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenFFLoadSC ||
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
}
@@ -3240,6 +3249,42 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
}
};
+/// A recipe for widening loads using fault-only-first intrinsics.
+/// Produces two results: (1) the loaded data, and (2) the index of the first
+/// non-dereferenceable lane, or VF if all lanes are successfully read.
+struct VPWidenFFLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
+ VPWidenFFLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *VF, VPValue *Mask,
+ const VPIRMetadata &Metadata, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenFFLoadSC, Load, {Addr, VF},
+ /*Consecutive*/ true, /*Reverse*/ false, Metadata,
+ DL),
+ VPValue(this, &Load) {
+ new VPValue(nullptr, this); // Index of the first lane that faults.
+ setMask(Mask);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenFFLoadSC);
+
+ /// Return the VF operand.
+ VPValue *getVF() const { return getOperand(1); }
+ void setVF(VPValue *V) { setOperand(1, V); }
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getVF() || Op == getAddr();
+ }
+};
+
/// A recipe for widening load operations with vector-predication intrinsics,
/// using the address to load from, the explicit vector length and an optional
/// mask.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 46ab7712e2671..684dbd25597e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -188,8 +188,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
}
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
- assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
- "Store recipes should not define any values");
+ assert(
+ (isa<VPWidenLoadRecipe, VPWidenFFLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
+ "Store recipes should not define any values");
return cast<LoadInst>(&R->getIngredient())->getType();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 8e9c3db50319f..3da8613a1d3cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -73,6 +73,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPReductionPHISC:
case VPScalarIVStepsSC:
case VPPredInstPHISC:
+ case VPWidenFFLoadSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
@@ -107,6 +108,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
+ case VPWidenFFLoadSC:
return true;
case VPReplicateSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
@@ -3409,6 +3411,47 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPWidenFFLoadRecipe::execute(VPTransformState &State) {
+ Type *ScalarDataTy = getLoadStoreType(&Ingredient);
+ auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+
+ Value *VL = State.get(getVF(), VPLane(0));
+ Type *I32Ty = Builder.getInt32Ty();
+ VL = Builder.CreateZExtOrTrunc(VL, I32Ty);
+ Value *Addr = State.get(getAddr(), true);
+ Value *Mask = nullptr;
+ if (VPValue *VPMask = getMask())
+ Mask = State.get(VPMask);
+ else
+ Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
+ CallInst *NewLI =
+ Builder.CreateIntrinsic(Intrinsic::vp_load_ff, {DataTy, Addr->getType()},
+ {Addr, Mask, VL}, nullptr, "vp.op.load.ff");
+ NewLI->addParamAttr(
+ 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
+ applyMetadata(*NewLI);
+ Value *V = cast<Instruction>(Builder.CreateExtractValue(NewLI, 0));
+ Value *NewVL = Builder.CreateExtractValue(NewLI, 1);
+ State.set(getVPValue(0), V);
+ State.set(getVPValue(1), NewVL, /*NeedsScalar=*/true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenFFLoadRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN ";
+ printAsOperand(O, SlotTracker);
+ O << ", ";
+ getVPValue(1)->printAsOperand(O, SlotTracker);
+ O << " = vp.load.ff ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
/// Use all-true mask for reverse rather than actual mask, as it avoids a
/// dependence w/o affecting the result.
static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1f6b85270607e..7e78cb6ed02ac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2760,6 +2760,102 @@ void VPlanTransforms::addExplicitVectorLength(
Plan.setUF(1);
}
+void VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety(VPlan &Plan) {
+ VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPWidenFFLoadRecipe *LastFFLoad = nullptr;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getVectorLoopRegion())))
+ for (VPRecipeBase &R : *VPBB)
+ if (auto *Load = dyn_cast<VPWidenFFLoadRecipe>(&R)) {
+ assert(!LastFFLoad && "Only one FFLoad is supported");
+ LastFFLoad = Load;
+ }
+
+ // Skip if no FFLoad.
+ if (!LastFFLoad)
+ return;
+
+ // Ensure FFLoad does not read past the remainder in the last iteration.
+ // Set AVL to min(VF, remainder).
+ VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPValue *Remainder = Builder.createNaryOp(
+ Instruction::Sub, {&Plan.getVectorTripCount(), Plan.getCanonicalIV()});
+ VPValue *Cmp =
+ Builder.createICmp(CmpInst::ICMP_ULE, &Plan.getVF(), Remainder);
+ VPValue *AVL = Builder.createSelect(Cmp, &Plan.getVF(), Remainder);
+ LastFFLoad->setVF(AVL);
+
+ // To prevent branch-on-poison, rewrite the early-exit condition to
+ // VPReductionEVLRecipe. Expected pattern here is:
+ // EMIT vp<%alt.exit.cond> = AnyOf
+ // EMIT vp<%exit.cond> = or vp<%alt.exit.cond>, vp<%main.exit.cond>
+ // EMIT branch-on-cond vp<%exit.cond>
+ auto *ExitingLatch = cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getExiting());
+ auto *LatchExitingBr = cast<VPInstruction>(ExitingLatch->getTerminator());
+
+ VPValue *VPAnyOf = nullptr;
+ VPValue *VecOp = nullptr;
+ assert(
+ match(LatchExitingBr,
+ m_BranchOnCond(m_BinaryOr(m_VPValue(VPAnyOf), m_VPValue()))) &&
+ match(VPAnyOf, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(VecOp))) &&
+ "unexpected exiting sequence in early exit loop");
+
+ VPValue *OpVPEVLI32 = LastFFLoad->getVPValue(1);
+ VPValue *Mask = LastFFLoad->getMask();
+ FastMathFlags FMF;
+ auto *I1Ty = Type::getInt1Ty(Plan.getContext());
+ VPValue *VPZero = Plan.getOrAddLiveIn(ConstantInt::get(I1Ty, 0));
+ DebugLoc DL = VPAnyOf->getDefiningRecipe()->getDebugLoc();
+ auto *NewAnyOf =
+ new VPReductionEVLRecipe(RecurKind::Or, FMF, VPZero, VecOp, *OpVPEVLI32,
+ Mask, /*IsOrdered*/ false, DL);
+ NewAnyOf->insertBefore(VPAnyOf->getDefiningRecipe());
+ VPAnyOf->replaceAllUsesWith(NewAnyOf);
+
+ // Using FirstActiveLane in the early-exit block is safe,
+ // exiting conditions guarantees at least one valid lane precedes
+ // any poisoned lanes.
+}
+
+void VPlanTransforms::convertFFLoadEarlyExitToVLStepping(VPlan &Plan) {
+ // Find loop header by locating VPWidenFFLoadRecipe.
+ VPWidenFFLoadRecipe *LastFFLoad = nullptr;
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry())))
+ for (VPRecipeBase &R : *VPBB)
+ if (auto *Load = dyn_cast<VPWidenFFLoadRecipe>(&R)) {
+ assert(!LastFFLoad && "Only one FFLoad is supported");
+ LastFFLoad = Load;
+ }
+
+ // Skip if no FFLoad.
+ if (!LastFFLoad)
+ return;
+
+ VPBasicBlock *HeaderVPBB = LastFFLoad->getParent();
+ // Replace IVStep (VFxUF) with returned VL from FFLoad.
+ auto *CanonicalIV = cast<VPPhi>(&*HeaderVPBB->begin());
+ VPValue *Backedge = CanonicalIV->getIncomingValue(1);
+ assert(match(Backedge, m_c_Add(m_Specific(CanonicalIV),
+ m_Specific(&Plan.getVFxUF()))) &&
+ "Unexpected canonical iv");
+ VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe();
+ VPValue *OpVPEVLI32 = LastFFLoad->getVPValue(1);
+ VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+ Builder.setInsertPoint(CanonicalIVIncrement);
+ auto *TC = Plan.getTripCount();
+ Type *CanIVTy = TC->isLiveIn()
+ ? TC->getLiveInIRValue()->getType()
+ : cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
+ auto *I32Ty = Type::getInt32Ty(Plan.getContext());
+ VPValue *OpVPEVL = Builder.createScalarZExtOrTrunc(
+ OpVPEVLI32, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
+
+ CanonicalIVIncrement->setOperand(1, OpVPEVL);
+}
+
void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
// Find EVL loop entries by locating VPEVLBasedIVPHIRecipe.
// There should be only one EVL PHI in the entire plan.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 69452a7e37572..bc5ce3bc43e76 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -269,6 +269,17 @@ struct VPlanTransforms {
/// (branch-on-cond eq AVLNext, 0)
static void canonicalizeEVLLoops(VPlan &Plan);
+ /// Applies to early-exit loops that use FFLoad. FFLoad may yield fewer active
+ /// lanes than VF. To prevent branch-on-poison and over-reads past the vector
+ /// trip count, use the returned VL for both stepping and exit computation.
+ /// Implemented by:
+ /// - adjustFFLoadEarlyExitForPoisonSafety: replace AnyOf with vp.reduce.or over
+ /// the first VL lanes; set AVL = min(VF, remainder).
+ /// - convertFFLoadEarlyExitToVLStepping: after region dissolution, convert
+ /// early-exit loops to variable-length stepping.
+ static void adjustFFLoadEarlyExitForPoisonSafety(VPlan &Plan);
+ static void convertFFLoadEarlyExitToVLStepping(VPlan &Plan);
+
/// Lower abstract recipes to concrete ones, that can be codegen'd.
static void convertToConcreteRecipes(VPlan &Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 0678bc90ef4b5..b2bc430a09686 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -40,6 +40,7 @@ class VPUser;
class VPRecipeBase;
class VPInterleaveBase;
class VPPhiAccessors;
+class VPWidenFFLoadRecipe;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -51,6 +52,7 @@ class LLVM_ABI_FOR_TEST VPValue {
friend class VPInterleaveBase;
friend class VPlan;
friend class VPExpressionRecipe;
+ friend class VPWidenFFLoadRecipe;
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -351,6 +353,7 @@ class VPDef {
VPWidenCastSC,
VPWidenGEPSC,
VPWidenIntrinsicSC,
+ VPWidenFFLoadSC,
VPWidenLoadEVLSC,
VPWidenLoadSC,
VPWidenStoreEVLSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 92caa0b4e51d5..70e6e0d006eb6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -166,8 +166,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
}
return VerifyEVLUse(*R, 2);
})
- .Case<VPWidenLoadEVLRecipe, VPVectorEndPointerRecipe,
- VPInterleaveEVLRecipe>(
+ .Case<VPWidenLoadEVLRecipe, VPWidenFFLoadRecipe,
+ VPVectorEndPointerRecipe, VPInterleaveEVLRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPInstructionWithType>(
[&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); })
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/find.ll b/llvm/test/Transforms/LoopVectorize/RISCV/find.ll
new file mode 100644
index 0000000000000..f734bd5f53c82
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/find.ll
@@ -0...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/151300
More information about the llvm-commits
mailing list