[llvm] [VPlan] Enable vectorization of early-exit loops with unit-stride fault-only-first loads (PR #151300)
Shih-Po Hung via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 20 18:42:24 PDT 2025
https://github.com/arcbbb updated https://github.com/llvm/llvm-project/pull/151300
>From 1ab67aecb908134fdade140b91beeefd9ba17264 Mon Sep 17 00:00:00 2001
From: ShihPo Hung <shihpo.hung at sifive.com>
Date: Fri, 25 Jul 2025 16:24:16 -0700
Subject: [PATCH] Support WidenFFLoad in early-exit loop
---
.../Transforms/Vectorize/LoopVectorize.cpp | 42 +++-
llvm/lib/Transforms/Vectorize/VPlan.h | 45 ++++
.../Transforms/Vectorize/VPlanAnalysis.cpp | 5 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 43 ++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 96 +++++++
.../Transforms/Vectorize/VPlanTransforms.h | 11 +
llvm/lib/Transforms/Vectorize/VPlanValue.h | 3 +
.../Transforms/Vectorize/VPlanVerifier.cpp | 4 +-
.../Transforms/LoopVectorize/RISCV/find.ll | 236 ++++++++++++++++++
9 files changed, 480 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/find.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1d3cffa2b61bf..e28d4c45d4ab8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -393,6 +393,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));
+static cl::opt<bool>
+ EnableEarlyExitWithFFLoads("enable-early-exit-with-ffload", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable vectorization of early-exit "
+ "loops with fault-only-first loads."));
+
static cl::opt<bool> ConsiderRegPressure(
"vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
cl::desc("Discard VFs if their register pressure is too high."));
@@ -3507,6 +3513,15 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}
+ if (!Legal->getPotentiallyFaultingLoads().empty() && UserIC > 1) {
+ reportVectorizationFailure("Auto-vectorization of loops with potentially "
+ "faulting loads is not supported when the "
+ "interleave count is more than 1",
+ "CantInterleaveLoopWithPotentiallyFaultingLoads",
+ ORE, TheLoop);
+ return FixedScalableVFPair::getNone();
+ }
+
ScalarEvolution *SE = PSE.getSE();
ElementCount TC = getSmallConstantTripCount(SE, TheLoop);
unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
@@ -4076,6 +4091,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPReductionPHISC:
case VPDef::VPInterleaveEVLSC:
case VPDef::VPInterleaveSC:
+ case VPDef::VPWidenFFLoadSC:
case VPDef::VPWidenLoadEVLSC:
case VPDef::VPWidenLoadSC:
case VPDef::VPWidenStoreEVLSC:
@@ -4550,6 +4566,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
if (!Legal->isSafeForAnyVectorWidth())
return 1;
+ // No interleaving for potentially faulting loads.
+ if (!Legal->getPotentiallyFaultingLoads().empty())
+ return 1;
+
// We don't attempt to perform interleaving for loops with uncountable early
// exits because the VPInstruction::AnyOf code cannot currently handle
// multiple parts.
@@ -7216,6 +7236,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
+
+ VPlanTransforms::convertFFLoadEarlyExitToVLStepping(BestVPlan);
+
// Canonicalize EVL loops after regions are dissolved.
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
@@ -7598,6 +7621,10 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Builder.insert(VectorPtr);
Ptr = VectorPtr;
}
+ if (Legal->getPotentiallyFaultingLoads().contains(I))
+ return new VPWidenFFLoadRecipe(*cast<LoadInst>(I), Ptr, &Plan.getVF(), Mask,
+ VPIRMetadata(*I, LVer), I->getDebugLoc());
+
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
VPIRMetadata(*Load, LVer), I->getDebugLoc());
@@ -8632,6 +8659,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
if (Recipe->getNumDefinedValues() == 1) {
SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
Old2New[SingleDef] = Recipe->getVPSingleValue();
+ } else if (isa<VPWidenFFLoadRecipe>(Recipe)) {
+ VPValue *Data = Recipe->getVPValue(0);
+ SingleDef->replaceAllUsesWith(Data);
+ Old2New[SingleDef] = Data;
} else {
assert(Recipe->getNumDefinedValues() == 0 &&
"Unexpected multidef recipe");
@@ -8679,6 +8710,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
+ VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety(*Plan);
+
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
@@ -9869,7 +9902,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- if (!LVL.getPotentiallyFaultingLoads().empty()) {
+ if (EnableEarlyExitWithFFLoads) {
+ if (LVL.getPotentiallyFaultingLoads().size() > 1) {
+ reportVectorizationFailure("Auto-vectorization of loops with more than 1 "
+ "potentially faulting load is not enabled",
+ "MoreThanOnePotentiallyFaultingLoad", ORE, L);
+ return false;
+ }
+ } else if (!LVL.getPotentiallyFaultingLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with potentially "
"faulting load is not supported",
"PotentiallyFaultingLoadsNotSupported", ORE, L);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f79855f7e2c5f..6e28c95ca601a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -563,6 +563,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPInterleaveEVLSC:
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPIRInstructionSC:
+ case VPRecipeBase::VPWidenFFLoadSC:
case VPRecipeBase::VPWidenLoadEVLSC:
case VPRecipeBase::VPWidenLoadSC:
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -2811,6 +2812,13 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
+ VPReductionEVLRecipe(RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp,
+ VPValue *VecOp, VPValue &EVL, VPValue *CondOp,
+ bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
+ : VPReductionRecipe(VPDef::VPReductionEVLSC, RdxKind, FMFs, nullptr,
+ ArrayRef<VPValue *>({ChainOp, VecOp, &EVL}), CondOp,
+ IsOrdered, DL) {}
+
~VPReductionEVLRecipe() override = default;
VPReductionEVLRecipe *clone() override {
@@ -3159,6 +3167,7 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenFFLoadSC ||
R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
}
@@ -3240,6 +3249,42 @@ struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
}
};
+/// A recipe for widening loads using fault-only-first intrinsics.
+/// Produces two results: (1) the loaded data, and (2) the index of the first
+/// non-dereferenceable lane, or VF if all lanes are successfully read.
+struct VPWidenFFLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
+ VPWidenFFLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *VF, VPValue *Mask,
+ const VPIRMetadata &Metadata, DebugLoc DL)
+ : VPWidenMemoryRecipe(VPDef::VPWidenFFLoadSC, Load, {Addr, VF},
+ /*Consecutive*/ true, /*Reverse*/ false, Metadata,
+ DL),
+ VPValue(this, &Load) {
+ new VPValue(nullptr, this); // Index of the first lane that faults.
+ setMask(Mask);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenFFLoadSC);
+
+ /// Return the VF operand.
+ VPValue *getVF() const { return getOperand(1); }
+ void setVF(VPValue *V) { setOperand(1, V); }
+
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getVF() || Op == getAddr();
+ }
+};
+
/// A recipe for widening load operations with vector-predication intrinsics,
/// using the address to load from, the explicit vector length and an optional
/// mask.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 46ab7712e2671..684dbd25597e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -188,8 +188,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
}
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
- assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
- "Store recipes should not define any values");
+ assert(
+ (isa<VPWidenLoadRecipe, VPWidenFFLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
+ "Store recipes should not define any values");
return cast<LoadInst>(&R->getIngredient())->getType();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 8e9c3db50319f..3da8613a1d3cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -73,6 +73,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPReductionPHISC:
case VPScalarIVStepsSC:
case VPPredInstPHISC:
+ case VPWidenFFLoadSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
@@ -107,6 +108,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();
case VPWidenLoadEVLSC:
case VPWidenLoadSC:
+ case VPWidenFFLoadSC:
return true;
case VPReplicateSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
@@ -3409,6 +3411,47 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPWidenFFLoadRecipe::execute(VPTransformState &State) {
+ Type *ScalarDataTy = getLoadStoreType(&Ingredient);
+ auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+
+ Value *VL = State.get(getVF(), VPLane(0));
+ Type *I32Ty = Builder.getInt32Ty();
+ VL = Builder.CreateZExtOrTrunc(VL, I32Ty);
+ Value *Addr = State.get(getAddr(), true);
+ Value *Mask = nullptr;
+ if (VPValue *VPMask = getMask())
+ Mask = State.get(VPMask);
+ else
+ Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
+ CallInst *NewLI =
+ Builder.CreateIntrinsic(Intrinsic::vp_load_ff, {DataTy, Addr->getType()},
+ {Addr, Mask, VL}, nullptr, "vp.op.load.ff");
+ NewLI->addParamAttr(
+ 0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
+ applyMetadata(*NewLI);
+ Value *V = cast<Instruction>(Builder.CreateExtractValue(NewLI, 0));
+ Value *NewVL = Builder.CreateExtractValue(NewLI, 1);
+ State.set(getVPValue(0), V);
+ State.set(getVPValue(1), NewVL, /*NeedsScalar=*/true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenFFLoadRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN ";
+ printAsOperand(O, SlotTracker);
+ O << ", ";
+ getVPValue(1)->printAsOperand(O, SlotTracker);
+ O << " = vp.load.ff ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
/// Use all-true mask for reverse rather than actual mask, as it avoids a
/// dependence w/o affecting the result.
static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1f6b85270607e..7e78cb6ed02ac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2760,6 +2760,102 @@ void VPlanTransforms::addExplicitVectorLength(
Plan.setUF(1);
}
+void VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety(VPlan &Plan) {
+ VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPWidenFFLoadRecipe *LastFFLoad = nullptr;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getVectorLoopRegion())))
+ for (VPRecipeBase &R : *VPBB)
+ if (auto *Load = dyn_cast<VPWidenFFLoadRecipe>(&R)) {
+ assert(!LastFFLoad && "Only one FFLoad is supported");
+ LastFFLoad = Load;
+ }
+
+ // Skip if no FFLoad.
+ if (!LastFFLoad)
+ return;
+
+ // Ensure FFLoad does not read past the remainder in the last iteration.
+ // Set AVL to min(VF, remainder).
+ VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPValue *Remainder = Builder.createNaryOp(
+ Instruction::Sub, {&Plan.getVectorTripCount(), Plan.getCanonicalIV()});
+ VPValue *Cmp =
+ Builder.createICmp(CmpInst::ICMP_ULE, &Plan.getVF(), Remainder);
+ VPValue *AVL = Builder.createSelect(Cmp, &Plan.getVF(), Remainder);
+ LastFFLoad->setVF(AVL);
+
+ // To prevent branch-on-poison, rewrite the early-exit condition to
+ // VPReductionEVLRecipe. Expected pattern here is:
+ // EMIT vp<%alt.exit.cond> = AnyOf
+ // EMIT vp<%exit.cond> = or vp<%alt.exit.cond>, vp<%main.exit.cond>
+ // EMIT branch-on-cond vp<%exit.cond>
+ auto *ExitingLatch = cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getExiting());
+ auto *LatchExitingBr = cast<VPInstruction>(ExitingLatch->getTerminator());
+
+ VPValue *VPAnyOf = nullptr;
+ VPValue *VecOp = nullptr;
+ assert(
+ match(LatchExitingBr,
+ m_BranchOnCond(m_BinaryOr(m_VPValue(VPAnyOf), m_VPValue()))) &&
+ match(VPAnyOf, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(VecOp))) &&
+ "unexpected exiting sequence in early exit loop");
+
+ VPValue *OpVPEVLI32 = LastFFLoad->getVPValue(1);
+ VPValue *Mask = LastFFLoad->getMask();
+ FastMathFlags FMF;
+ auto *I1Ty = Type::getInt1Ty(Plan.getContext());
+ VPValue *VPZero = Plan.getOrAddLiveIn(ConstantInt::get(I1Ty, 0));
+ DebugLoc DL = VPAnyOf->getDefiningRecipe()->getDebugLoc();
+ auto *NewAnyOf =
+ new VPReductionEVLRecipe(RecurKind::Or, FMF, VPZero, VecOp, *OpVPEVLI32,
+ Mask, /*IsOrdered*/ false, DL);
+ NewAnyOf->insertBefore(VPAnyOf->getDefiningRecipe());
+ VPAnyOf->replaceAllUsesWith(NewAnyOf);
+
+ // Using FirstActiveLane in the early-exit block is safe,
+ // exiting conditions guarantees at least one valid lane precedes
+ // any poisoned lanes.
+}
+
+void VPlanTransforms::convertFFLoadEarlyExitToVLStepping(VPlan &Plan) {
+ // Find loop header by locating VPWidenFFLoadRecipe.
+ VPWidenFFLoadRecipe *LastFFLoad = nullptr;
+
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry())))
+ for (VPRecipeBase &R : *VPBB)
+ if (auto *Load = dyn_cast<VPWidenFFLoadRecipe>(&R)) {
+ assert(!LastFFLoad && "Only one FFLoad is supported");
+ LastFFLoad = Load;
+ }
+
+ // Skip if no FFLoad.
+ if (!LastFFLoad)
+ return;
+
+ VPBasicBlock *HeaderVPBB = LastFFLoad->getParent();
+ // Replace IVStep (VFxUF) with returned VL from FFLoad.
+ auto *CanonicalIV = cast<VPPhi>(&*HeaderVPBB->begin());
+ VPValue *Backedge = CanonicalIV->getIncomingValue(1);
+ assert(match(Backedge, m_c_Add(m_Specific(CanonicalIV),
+ m_Specific(&Plan.getVFxUF()))) &&
+ "Unexpected canonical iv");
+ VPRecipeBase *CanonicalIVIncrement = Backedge->getDefiningRecipe();
+ VPValue *OpVPEVLI32 = LastFFLoad->getVPValue(1);
+ VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+ Builder.setInsertPoint(CanonicalIVIncrement);
+ auto *TC = Plan.getTripCount();
+ Type *CanIVTy = TC->isLiveIn()
+ ? TC->getLiveInIRValue()->getType()
+ : cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
+ auto *I32Ty = Type::getInt32Ty(Plan.getContext());
+ VPValue *OpVPEVL = Builder.createScalarZExtOrTrunc(
+ OpVPEVLI32, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
+
+ CanonicalIVIncrement->setOperand(1, OpVPEVL);
+}
+
void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
// Find EVL loop entries by locating VPEVLBasedIVPHIRecipe.
// There should be only one EVL PHI in the entire plan.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 69452a7e37572..bc5ce3bc43e76 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -269,6 +269,17 @@ struct VPlanTransforms {
/// (branch-on-cond eq AVLNext, 0)
static void canonicalizeEVLLoops(VPlan &Plan);
+ /// Applies to early-exit loops that use FFLoad. FFLoad may yield fewer active
+ /// lanes than VF. To prevent branch-on-poison and over-reads past the vector
+ /// trip count, use the returned VL for both stepping and exit computation.
+ /// Implemented by:
+ /// - adjustFFLoadEarlyExitForPoisonSafety: replace AnyOf with vp.reduce.or over
+ /// the first VL lanes; set AVL = min(VF, remainder).
+ /// - convertFFLoadEarlyExitToVLStepping: after region dissolution, convert
+ /// early-exit loops to variable-length stepping.
+ static void adjustFFLoadEarlyExitForPoisonSafety(VPlan &Plan);
+ static void convertFFLoadEarlyExitToVLStepping(VPlan &Plan);
+
/// Lower abstract recipes to concrete ones, that can be codegen'd.
static void convertToConcreteRecipes(VPlan &Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 0678bc90ef4b5..b2bc430a09686 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -40,6 +40,7 @@ class VPUser;
class VPRecipeBase;
class VPInterleaveBase;
class VPPhiAccessors;
+class VPWidenFFLoadRecipe;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -51,6 +52,7 @@ class LLVM_ABI_FOR_TEST VPValue {
friend class VPInterleaveBase;
friend class VPlan;
friend class VPExpressionRecipe;
+ friend class VPWidenFFLoadRecipe;
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -351,6 +353,7 @@ class VPDef {
VPWidenCastSC,
VPWidenGEPSC,
VPWidenIntrinsicSC,
+ VPWidenFFLoadSC,
VPWidenLoadEVLSC,
VPWidenLoadSC,
VPWidenStoreEVLSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 92caa0b4e51d5..70e6e0d006eb6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -166,8 +166,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
}
return VerifyEVLUse(*R, 2);
})
- .Case<VPWidenLoadEVLRecipe, VPVectorEndPointerRecipe,
- VPInterleaveEVLRecipe>(
+ .Case<VPWidenLoadEVLRecipe, VPWidenFFLoadRecipe,
+ VPVectorEndPointerRecipe, VPInterleaveEVLRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPInstructionWithType>(
[&](const VPInstructionWithType *S) { return VerifyEVLUse(*S, 0); })
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/find.ll b/llvm/test/Transforms/LoopVectorize/RISCV/find.ll
new file mode 100644
index 0000000000000..f734bd5f53c82
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/find.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -enable-early-exit-with-ffload -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
+
+define ptr @find_with_liveout(ptr %first, ptr %last, ptr %value) {
+; CHECK-LABEL: define ptr @find_with_liveout(
+; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]], ptr [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[FIRST4:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST3:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[FIRST2:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST1:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT6]], label %[[RETURN:.*]], label %[[FOR_BODY_LR_PH:.*]]
+; CHECK: [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LAST3]], -4
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[FIRST4]]
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP6]], i64 20)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[UMAX]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK: [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[LAST1]] to i2
+; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[FIRST2]] to i2
+; CHECK-NEXT: [[TMP9:%.*]] = sub i2 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext i2 [[TMP9]] to i64
+; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], [[TMP12]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 4
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[N_VEC]], [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ule i64 [[TMP12]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP12]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
+; CHECK-NEXT: [[VP_OP_LOAD_FF:%.*]] = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr align 4 [[NEXT_GEP]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP18]])
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, i32 } [[VP_OP_LOAD_FF]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <vscale x 4 x i32>, i32 } [[VP_OP_LOAD_FF]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq <vscale x 4 x i32> [[TMP19]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
+; CHECK-NEXT: [[TMP23:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP20]])
+; CHECK-NEXT: [[TMP24:%.*]] = or i1 [[TMP23]], false
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: br i1 [[TMP24]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[RETURN_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP21]], i1 true)
+; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[INDEX]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = mul i64 [[TMP28]], 4
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP29]]
+; CHECK-NEXT: br label %[[RETURN_LOOPEXIT]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[FOR_BODY_LR_PH]] ], [ [[FIRST]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[FIRST_ADDR_07:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], %[[FOR_INC:.*]] ]
+; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[FIRST_ADDR_07]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP31]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN_LOOPEXIT]], label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[FIRST_ADDR_07]], i64 1
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[RETURN_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[RETURN_LOOPEXIT]]:
+; CHECK-NEXT: [[RETVAL_0_PH:%.*]] = phi ptr [ [[FIRST_ADDR_07]], %[[FOR_BODY]] ], [ [[LAST]], %[[FOR_INC]] ], [ [[LAST]], %[[MIDDLE_BLOCK]] ], [ [[TMP30]], %[[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[RETVAL_0_PH]], %[[RETURN_LOOPEXIT]] ]
+; CHECK-NEXT: ret ptr [[RETVAL_0]]
+;
+entry:
+ %cmp.not6 = icmp eq ptr %first, %last
+ br i1 %cmp.not6, label %return, label %for.body.lr.ph
+
+for.body.lr.ph:
+ %0 = load i32, ptr %value, align 4
+ br label %for.body
+
+for.body:
+ %first.addr.07 = phi ptr [ %first, %for.body.lr.ph ], [ %incdec.ptr, %for.inc ]
+ %1 = load i32, ptr %first.addr.07, align 4
+ %cmp1 = icmp eq i32 %1, %0
+ br i1 %cmp1, label %return.loopexit, label %for.inc
+
+for.inc:
+ %incdec.ptr = getelementptr inbounds i32, ptr %first.addr.07, i64 1
+ %cmp.not = icmp eq ptr %incdec.ptr, %last
+ br i1 %cmp.not, label %return.loopexit, label %for.body
+
+return.loopexit:
+ %retval.0.ph = phi ptr [ %first.addr.07, %for.body ], [ %last, %for.inc ]
+ br label %return
+
+return:
+ %retval.0 = phi ptr [ %first, %entry ], [ %retval.0.ph, %return.loopexit ]
+ ret ptr %retval.0
+}
+
+define i32 @find_without_liveout(ptr %first, ptr %last, ptr %value) {
+; CHECK-LABEL: define i32 @find_without_liveout(
+; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]], ptr [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[FIRST4:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST3:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[FIRST2:%.*]] = ptrtoint ptr [[FIRST]] to i64
+; CHECK-NEXT: [[LAST1:%.*]] = ptrtoint ptr [[LAST]] to i64
+; CHECK-NEXT: [[CMP_NOT6:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT6]], label %[[RETURN:.*]], label %[[FOR_BODY_LR_PH:.*]]
+; CHECK: [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VALUE]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LAST3]], -4
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[FIRST4]]
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
+; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP6]], i64 15)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[UMAX]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK: [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[LAST1]] to i2
+; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[FIRST2]] to i2
+; CHECK-NEXT: [[TMP9:%.*]] = sub i2 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = zext i2 [[TMP9]] to i64
+; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], [[TMP12]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 4
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[N_VEC]], [[INDEX]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp ule i64 [[TMP12]], [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i64 [[TMP12]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
+; CHECK-NEXT: [[VP_OP_LOAD_FF:%.*]] = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr align 4 [[NEXT_GEP]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP18]])
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, i32 } [[VP_OP_LOAD_FF]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <vscale x 4 x i32>, i32 } [[VP_OP_LOAD_FF]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = icmp eq <vscale x 4 x i32> [[TMP19]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
+; CHECK-NEXT: [[TMP23:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, <vscale x 4 x i1> [[TMP21]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP20]])
+; CHECK-NEXT: [[TMP24:%.*]] = or i1 [[TMP23]], false
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i1 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: br i1 [[TMP24]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[RETURN_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: br label %[[RETURN_LOOPEXIT]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[FOR_BODY_LR_PH]] ], [ [[FIRST]], %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[FIRST_ADDR_07:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], %[[FOR_INC:.*]] ]
+; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[FIRST_ADDR_07]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP27]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN_LOOPEXIT]], label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, ptr [[FIRST_ADDR_07]], i64 1
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[LAST]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[RETURN_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[RETURN_LOOPEXIT]]:
+; CHECK-NEXT: [[RETVAL_0_PH:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ 1, %[[FOR_INC]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RETVAL_0_PH]], %[[RETURN_LOOPEXIT]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %cmp.not6 = icmp eq ptr %first, %last
+ br i1 %cmp.not6, label %return, label %for.body.lr.ph
+
+for.body.lr.ph:
+ %0 = load i32, ptr %value, align 4
+ br label %for.body
+
+for.body:
+ %first.addr.07 = phi ptr [ %first, %for.body.lr.ph ], [ %incdec.ptr, %for.inc ]
+ %1 = load i32, ptr %first.addr.07, align 4
+ %cmp1 = icmp eq i32 %1, %0
+ br i1 %cmp1, label %return.loopexit, label %for.inc
+
+for.inc:
+ %incdec.ptr = getelementptr inbounds i32, ptr %first.addr.07, i64 1
+ %cmp.not = icmp eq ptr %incdec.ptr, %last
+ br i1 %cmp.not, label %return.loopexit, label %for.body
+
+return.loopexit:
+ %retval.0.ph = phi i32 [ 0, %for.body ], [ 1, %for.inc ]
+ br label %return
+
+return:
+ %retval.0 = phi i32 [ 0, %entry ], [ %retval.0.ph, %return.loopexit ]
+ ret i32 %retval.0
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+;.
More information about the llvm-commits
mailing list