[llvm] [LV] Initial support for stores in early exit loops (PR #137774)
Graham Hunter via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 04:02:52 PDT 2025
https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/137774
>From 620f8797ebd6ea9a2700d20cf01106a3e3b201f2 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 4 Apr 2025 15:44:52 +0000
Subject: [PATCH 1/4] [LV] Initial support for stores in early exit loops
Adds some basic support for a simple early exit loop with a store.
This is vectorized such that when the next vector iteration would
exit, we bail out to the scalar loop to handle the exit.
---
.../Vectorize/LoopVectorizationLegality.h | 19 ++
.../Vectorize/LoopVectorizationLegality.cpp | 125 ++++++++++-
.../Transforms/Vectorize/LoopVectorize.cpp | 23 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 17 ++
.../Transforms/Vectorize/VPlanPatternMatch.h | 44 ++++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 197 ++++++++++++++----
.../Transforms/Vectorize/VPlanTransforms.h | 2 +
llvm/lib/Transforms/Vectorize/VPlanValue.h | 2 +
.../Transforms/Vectorize/VPlanVerifier.cpp | 4 +-
.../AArch64/simple_early_exit.ll | 78 +++++++
.../Transforms/LoopVectorize/control-flow.ll | 2 +-
.../LoopVectorize/early_exit_legality.ll | 188 ++++++++++++++++-
13 files changed, 652 insertions(+), 53 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..1c153a203d4ec 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -407,6 +407,15 @@ class LoopVectorizationLegality {
return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
}
+ /// Returns true if this is an early exit loop containing a store.
+ bool isConditionCopyRequired() const {
+ return RequiresEarlyExitConditionCopy;
+ }
+
+ /// Returns the load instruction, if any, nearest to an uncountable early
+ /// exit.
+ std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }
+
/// Return true if there is store-load forwarding dependencies.
bool isSafeForAnyStoreLoadForwardDistances() const {
return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
@@ -654,6 +663,16 @@ class LoopVectorizationLegality {
/// Keep track of the loop edge to an uncountable exit, comprising a pair
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
+
+ /// Indicates that we will need to copy the early exit condition into
+ /// the vector preheader, as we will need to mask some operations in
+ /// the loop (e.g. stores).
+ bool RequiresEarlyExitConditionCopy = false;
+
+ /// The load used to determine an uncountable early-exit condition. This is
+ /// only used to allow further analysis in canVectorizeMemory if we found
+ /// what looks like a valid early exit loop with store beforehand.
+ std::optional<LoadInst *> EarlyExitLoad;
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 8e09e6f8d4935..71a1770cf7c75 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1209,6 +1210,36 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
});
}
+ // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+ // since we're (potentially) doing the load out of its normal order
+ // in the loop and that may throw off dependency checking.
+ // A forward dependency should be fine, but a backwards dep may not
+ // be even if LAA thinks it is due to performing the load for the
+ // vector iteration i+1 in vector iteration i.
+ if (isConditionCopyRequired()) {
+ assert(EarlyExitLoad.has_value() && "EE Store without condition load.");
+
+ if (LAI->canVectorizeMemory()) {
+ const MemoryDepChecker &DepChecker = LAI->getDepChecker();
+ const auto *Deps = DepChecker.getDependences();
+
+ for (const MemoryDepChecker::Dependence &Dep : *Deps) {
+ if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
+ Dep.getSource(DepChecker) == EarlyExitLoad) {
+ // Refine language a little? This currently only applies when a store
+ // is present in the early exit loop.
+ reportVectorizationFailure(
+ "No dependencies allowed for early exit condition load",
+ "Early exit condition loads may not have a dependence with "
+ "another"
+ " memory operation.",
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+ return false;
+ }
+ }
+ }
+ }
+
if (!LAI->canVectorizeMemory())
return canVectorizeIndirectUnsafeDependences();
@@ -1627,6 +1658,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
// Keep a record of all the exiting blocks.
SmallVector<const SCEVPredicate *, 4> Predicates;
std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge;
+ std::optional<LoadInst *> EELoad;
for (BasicBlock *BB : ExitingBlocks) {
const SCEV *EC =
PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
@@ -1656,6 +1688,21 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
return false;
}
+ // For loops with stores.
+ // Record load for analysis by isDereferenceableAndAlignedInLoop
+ // and later by dependence analysis.
+ if (BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // FIXME: Handle exit conditions with multiple users, more complex exit
+ // conditions than br(icmp(load, loop_inv)).
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+ if (Cmp && Cmp->hasOneUse() &&
+ TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+ LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+ if (Load && Load->hasOneUse() && TheLoop->contains(Load))
+ EELoad = Load;
+ }
+ }
+
SingleUncountableEdge = {BB, ExitBlock};
} else
CountableExitingBlocks.push_back(BB);
@@ -1708,16 +1755,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
}
};
+ bool HasStore = false;
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ HasStore = true;
+ if (SI->isSimple())
+ continue;
+
+ reportVectorizationFailure(
+ "Complex writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with complex writes to memory",
+ "WritesInEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+
if (I.mayWriteToMemory()) {
// We don't support writes to memory.
reportVectorizationFailure(
- "Writes to memory unsupported in early exit loops",
- "Cannot vectorize early exit loop with writes to memory",
+ "Complex writes to memory unsupported in early exit loops",
+ "Cannot vectorize early exit loop with complex writes to memory",
"WritesInEarlyExitLoop", ORE, TheLoop);
return false;
- } else if (!IsSafeOperation(&I)) {
+ }
+
+ if (!IsSafeOperation(&I)) {
reportVectorizationFailure("Early exit loop contains operations that "
"cannot be speculatively executed",
"UnsafeOperationsEarlyExitLoop", ORE,
@@ -1732,13 +1794,53 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
// TODO: Handle loops that may fault.
Predicates.clear();
- if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
- &Predicates)) {
+
+ if (HasStore && EELoad.has_value()) {
+ LoadInst *LI = *EELoad;
+ if (isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(), *DT, AC,
+ &Predicates)) {
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(TheLoop);
+ // FIXME: We may have multiple levels of conditional loads, so will
+ // need to improve on outright rejection at some point.
+ if (!SafetyInfo.isGuaranteedToExecute(*LI, DT, TheLoop)) {
+ LLVM_DEBUG(
+ dbgs() << "Early exit condition load not guaranteed to execute.\n");
+ reportVectorizationFailure(
+ "Early exit condition load not guaranteed to execute",
+ "Cannot vectorize early exit loop when condition load is not "
+ "guaranteed to execute",
+ "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
+ reportVectorizationFailure("Uncounted loop condition not known safe",
+ "Cannot vectorize early exit loop with "
+ "possibly unsafe condition load",
+ "PotentiallyFaultingEarlyExitLoop", ORE,
+ TheLoop);
+ return false;
+ }
+ } else if (HasStore) {
+ LLVM_DEBUG(dbgs() << "Found early exit store but no condition load.\n");
reportVectorizationFailure(
- "Loop may fault",
- "Cannot vectorize potentially faulting early exit loop",
- "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ "Early exit loop with store but no condition load",
+ "Cannot vectorize early exit loop with store but no condition load",
+ "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
return false;
+ } else {
+ // Read-only loop.
+ // FIXME: as with the loops with stores, only the loads contributing to
+ // the loop condition need to be guaranteed dereferenceable and
+ // aligned.
+ if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
+ &Predicates)) {
+ reportVectorizationFailure(
+ "Loop may fault",
+ "Cannot vectorize potentially faulting early exit loop",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
}
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
@@ -1751,6 +1853,11 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
"backedge taken count: "
<< *SymbolicMaxBTC << '\n');
UncountableEdge = SingleUncountableEdge;
+ if (HasStore) {
+ RequiresEarlyExitConditionCopy = true;
+ EarlyExitLoad = EELoad;
+ }
+
return true;
}
@@ -1823,6 +1930,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
} else {
if (!isVectorizableEarlyExitLoop()) {
UncountableEdge = std::nullopt;
+ EarlyExitLoad = std::nullopt;
+ RequiresEarlyExitConditionCopy = false;
if (DoExtraAnalysis)
Result = false;
else
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7ad02956a5b69..dd734b231fb6c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2530,8 +2530,10 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
for (auto &R : make_early_inc_range(*VPBB)) {
- assert(!R.isPhi() && "Tried to move phi recipe to end of block");
- R.moveBefore(*IRVPBB, IRVPBB->end());
+ if (R.isPhi())
+ R.moveBefore(*IRVPBB, IRVPBB->getFirstNonPhi());
+ else
+ R.moveBefore(*IRVPBB, IRVPBB->end());
}
VPBlockUtils::reassociateBlocks(VPBB, IRVPBB);
@@ -9100,6 +9102,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
*Plan, CM.getMinimalBitwidths());
VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
+
+ // See if we can convert an early exit vplan to bail out to a scalar
+ // loop if state-changing operations (like stores) are present and
+ // an exit will be taken in the next vector iteration.
+ // If not, discard the plan.
+ if (Legal->isConditionCopyRequired() && !HasScalarVF &&
+ !VPlanTransforms::runPass(VPlanTransforms::tryEarlyExitConversion,
+ *Plan))
+ break;
// TODO: try to put it close to addActiveLaneMask().
// Discard the plan if it is not EVL-compatible
if (CM.foldTailWithEVL() && !HasScalarVF &&
@@ -9380,6 +9391,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
Range);
DenseMap<const VPBlockBase *, BasicBlock *> VPB2IRBB;
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+ // FIXME: Better place to put this? Or maybe an enum for how to handle
+ // early exits?
+ if (Legal->hasUncountableEarlyExit())
+ Plan->setEarlyExitContinuesInScalarLoop(Legal->isConditionCopyRequired());
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop,
@@ -9681,6 +9696,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
DenseMap<const VPBlockBase *, BasicBlock *> VPB2IRBB;
auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+ // FIXME: Better place to put this? Or maybe an enum for how to handle
+ // early exits?
+ if (Legal->hasUncountableEarlyExit())
+ Plan->setEarlyExitContinuesInScalarLoop(Legal->isConditionCopyRequired());
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 2c4cac7655ec9..2b7ab5a4254dd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3636,6 +3636,13 @@ class VPlan {
/// VPlan is destroyed.
SmallVector<VPBlockBase *> CreatedBlocks;
+ /// Indicates that an early exit loop will exit before the condition is
+ /// reached, and that the scalar loop must perform the last few iterations.
+ /// FIXME: Is this the right place? We mainly want to make sure that we
+ /// know about this for transforming the plan to copy&move the exit
+ /// condition, but maybe it doesn't need to be in the plan itself.
+ bool EarlyExitContinuesInScalarLoop = false;
+
/// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
/// wrapping the original header of the scalar loop.
VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
@@ -3939,6 +3946,16 @@ class VPlan {
return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1;
}
+ /// Returns true if all exit paths should reach the scalar loop.
+ bool shouldEarlyExitContinueInScalarLoop() const {
+ return EarlyExitContinuesInScalarLoop;
+ }
+
+ /// Set early exit vectorization to always reach the scalar loop.
+ void setEarlyExitContinuesInScalarLoop(bool Continues) {
+ EarlyExitContinuesInScalarLoop = Continues;
+ }
+
/// Returns true if the scalar tail may execute after the vector loop. Note
/// that this relies on unneeded branches to the scalar tail loop being
/// removed.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index f2a7f16e19a79..3b3e2415d7b00 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -41,6 +41,17 @@ template <typename Class> struct class_match {
/// Match an arbitrary VPValue and ignore it.
inline class_match<VPValue> m_VPValue() { return class_match<VPValue>(); }
+struct loop_invariant_vpvalue {
+ template <typename ITy> bool match(ITy *V) const {
+ VPValue *Val = dyn_cast<VPValue>(V);
+ return Val && Val->isDefinedOutsideLoopRegions();
+ }
+};
+
+inline loop_invariant_vpvalue m_LoopInvVPValue() {
+ return loop_invariant_vpvalue();
+}
+
template <typename Class> struct bind_ty {
Class *&VR;
@@ -324,6 +335,12 @@ m_Not(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::Not>(Op0);
}
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+m_AnyOf(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::BranchOnCond>
m_BranchOnCond(const Op0_t &Op0) {
@@ -431,6 +448,19 @@ inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
return GEPLikeRecipe_match<Op0_t, Op1_t>(Op0, Op1);
}
+// FIXME: Separate Commutative matcher? Share result type?
+// FIXME: Are there other recipe types for ICmp?
+template <typename Op0_t, typename Op1_t>
+using ICmpRecipe_match =
+ BinaryRecipe_match<Op0_t, Op1_t, Instruction::ICmp, false, VPWidenRecipe,
+ VPReplicateRecipe>;
+
+template <typename Op0_t, typename Op1_t>
+inline ICmpRecipe_match<Op0_t, Op1_t> m_ICmp(const Op0_t &Op0,
+ const Op1_t &Op1) {
+ return ICmpRecipe_match<Op0_t, Op1_t>(Op0, Op1);
+}
+
template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
using AllTernaryRecipe_match =
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, false,
@@ -581,6 +611,20 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
}
+template <typename SubPattern_t> struct OneUse_match {
+ SubPattern_t SubPattern;
+
+ OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ return V->hasOneUse() && SubPattern.match(V);
+ }
+};
+
+template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
+ return SubPattern;
+}
+
} // namespace VPlanPatternMatch
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3c7ab7d24bf6d..f8b1ff5d77ec7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -276,7 +276,9 @@ InstructionCost VPRecipeBase::computeCost(ElementCount VF,
bool VPRecipeBase::isPhi() const {
return (getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC) ||
(isa<VPInstruction>(this) &&
- cast<VPInstruction>(this)->getOpcode() == Instruction::PHI) ||
+ (cast<VPInstruction>(this)->getOpcode() == Instruction::PHI ||
+ cast<VPInstruction>(this)->getOpcode() ==
+ VPInstruction::ResumePhi)) ||
isa<VPIRPhi>(this);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 806c20ef8cf73..93016da928a54 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2493,48 +2493,52 @@ void VPlanTransforms::handleUncountableEarlyExit(
// block if CondToEarlyExit.
VPValue *IsEarlyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
- VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
- VPBasicBlock *VectorEarlyExitVPBB =
- Plan.createVPBasicBlock("vector.early.exit");
- VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
- VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
- NewMiddle->swapSuccessors();
-
- VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
-
- // Update the exit phis in the early exit block.
- VPBuilder MiddleBuilder(NewMiddle);
- VPBuilder EarlyExitB(VectorEarlyExitVPBB);
- for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
- auto *ExitIRI = cast<VPIRPhi>(&R);
- // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
- // a single predecessor and 1 if it has two.
- unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
- if (ExitIRI->getNumOperands() != 1) {
- // The first of two operands corresponds to the latch exit, via MiddleVPBB
- // predecessor. Extract its last lane.
- ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
- }
- VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
- auto IsVector = [](ElementCount VF) { return VF.isVector(); };
- // When the VFs are vectors, need to add `extract` to get the incoming value
- // from early exit. When the range contains scalar VF, limit the range to
- // scalar VF to prevent mis-compilation for the range containing both scalar
- // and vector VFs.
- if (!IncomingFromEarlyExit->isLiveIn() &&
- LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
- // Update the incoming value from the early exit.
- VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
- VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
- "first.active.lane");
- IncomingFromEarlyExit = EarlyExitB.createNaryOp(
- Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
- nullptr, "early.exit.value");
- ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
+ if (!Plan.shouldEarlyExitContinueInScalarLoop()) {
+ VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
+ VPBasicBlock *VectorEarlyExitVPBB =
+ Plan.createVPBasicBlock("vector.early.exit");
+ VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
+ VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
+ NewMiddle->swapSuccessors();
+
+ VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
+
+ // Update the exit phis in the early exit block.
+ VPBuilder MiddleBuilder(NewMiddle);
+ VPBuilder EarlyExitB(VectorEarlyExitVPBB);
+ for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
+ auto *ExitIRI = cast<VPIRPhi>(&R);
+ // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
+ // a single predecessor and 1 if it has two.
+ unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
+ if (ExitIRI->getNumOperands() != 1) {
+ // The first of two operands corresponds to the latch exit, via
+ // MiddleVPBB predecessor. Extract its last lane.
+ ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
+ }
+
+ VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
+ auto IsVector = [](ElementCount VF) { return VF.isVector(); };
+ // When the VFs are vectors, need to add `extract` to get the incoming
+ // value from early exit. When the range contains scalar VF, limit the
+ // range to scalar VF to prevent mis-compilation for the range containing
+ // both scalar and vector VFs.
+ if (!IncomingFromEarlyExit->isLiveIn() &&
+ LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
+ // Update the incoming value from the early exit.
+ VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
+ VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
+ "first.active.lane");
+ IncomingFromEarlyExit =
+ EarlyExitB.createNaryOp(Instruction::ExtractElement,
+ {IncomingFromEarlyExit, FirstActiveLane},
+ nullptr, "early.exit.value");
+ ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
+ }
}
+ MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
}
- MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
// Replace the condition controlling the non-early exit from the vector loop
// with one exiting if either the original condition of the vector latch is
@@ -2551,6 +2555,121 @@ void VPlanTransforms::handleUncountableEarlyExit(
LatchExitingBranch->eraseFromParent();
}
+bool VPlanTransforms::tryEarlyExitConversion(VPlan &Plan) {
+ // We can abandon a vplan entirely if we return false here, so we shouldn't
+ // crash if some earlier assumptions on scalar IR don't hold for the vplan
+ // version of the loop.
+ if (Plan.hasScalarVFOnly())
+ return false;
+ auto *Region = Plan.getVectorLoopRegion();
+ using namespace llvm::VPlanPatternMatch;
+ VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+
+ // Find the uncounted loop exit condition.
+ VPValue *Uncounted = nullptr;
+ if (!match(Region->getExitingBasicBlock()->getTerminator(),
+ m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+ m_OneUse(m_AnyOf(m_VPValue(Uncounted))), m_VPValue())))))
+ return false;
+
+ // FIXME: Copy while scanning through IR; no need to save into a list, and
+ // we avoid problems with cloning differing recipe types.
+
+ // Extract the IR needed to create the uncountable exit condition.
+ // Looking for br(or(any_of(icmp(load(gep(base, iv)), loop_inv)), counted)
+ // FIXME: Build a list of nodes to copy below instead of matching
+ // the exact pattern.
+ // FIXME: We should be able to handle multiple users for at least some of
+ // these nodes; requires creating phis.
+ // FIXME: This does feel a bit fragile; is it better to do this earlier
+ // when creating the initial recipe based on the scalar IR, instead
+ // of the vplan equivalent here?
+ // FIXME: New vplan pattern matchers; m_Load, m_ICmp, m_OneUse, etc.
+ auto *Cmp = dyn_cast<VPWidenRecipe>(Uncounted);
+ if (!Cmp || !Cmp->hasOneUse() || Cmp->getOpcode() != Instruction::ICmp ||
+ !Cmp->getOperand(1)->isDefinedOutsideLoopRegions())
+ return false;
+ auto *Load = dyn_cast<VPWidenLoadRecipe>(Cmp->getOperand(0));
+ if (!Load || !Load->hasOneUse() || !Load->isConsecutive())
+ return false;
+ auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Load->getAddr());
+ if (!VecPtr || !VecPtr->hasOneUse())
+ return false;
+
+ VPReplicateRecipe *GEP = dyn_cast<VPReplicateRecipe>(VecPtr->getOperand(0));
+ if (!GEP || !match(GEP, m_GetElementPtr(
+ m_LoopInvVPValue(),
+ m_ScalarIVSteps(m_Specific(IV), m_SpecificInt(1),
+ m_Specific(&Plan.getVF())))))
+ return false;
+
+ VPInstruction *IVUpdate = dyn_cast<VPInstruction>(IV->getBackedgeValue());
+ if (!IVUpdate)
+ return false;
+
+ // Duplicate exit IR and use the starting value for the IV phi.
+ auto *VectorPH = Plan.getVectorPreheader();
+ VPBuilder PHBuilder(VectorPH, VectorPH->getFirstNonPhi());
+ VPReplicateRecipe *PHGEP = GEP->clone();
+ PHGEP->setOperand(1, IV->getStartValue());
+ PHBuilder.insert(PHGEP);
+ VPVectorPointerRecipe *PHVecPtr = VecPtr->clone();
+ PHVecPtr->setOperand(0, PHGEP);
+ PHBuilder.insert(PHVecPtr);
+ VPWidenLoadRecipe *PHLoad = Load->clone();
+ PHLoad->setOperand(0, PHVecPtr);
+ PHBuilder.insert(PHLoad);
+ VPWidenRecipe *PHCmp = Cmp->clone();
+ PHCmp->setOperand(0, PHLoad);
+ PHBuilder.insert(PHCmp);
+
+ // Split vector preheader to form a new bypass block.
+ VPBasicBlock *NewPH = VectorPH->splitAt(PHBuilder.getInsertPoint());
+ VPBasicBlock *ScalarPH = Plan.getScalarPreheader();
+ VPValue *PHAnyOf = PHBuilder.createNaryOp(VPInstruction::AnyOf, {PHCmp});
+ PHBuilder.createNaryOp(VPInstruction::BranchOnCond, {PHAnyOf},
+ PHCmp->getDebugLoc());
+ VectorPH->clearSuccessors();
+ VectorPH->setTwoSuccessors(ScalarPH, NewPH);
+
+ // Fix up the resume phi in scalar preheader -- we might not have reached
+ // the calculated maximum vector tripcount, so just use the next value of IV.
+ // FIXME: Can we rely on the resume phi being first?
+ // Might need a map. For now, grab the phis in the block and abandon
+ // if there's more than one.
+ VPInstruction *ResumePHI = nullptr;
+ for (VPRecipeBase &PHI : ScalarPH->phis()) {
+ if (ResumePHI)
+ return false;
+ ResumePHI = dyn_cast<VPInstruction>(&PHI);
+ }
+ if (!ResumePHI || ResumePHI->getOpcode() != VPInstruction::ResumePhi)
+ return false;
+ VPBasicBlock *MiddleBlock = Plan.getMiddleBlock();
+ ScalarPH->clearPredecessors();
+ ScalarPH->setPredecessors({MiddleBlock, VectorPH});
+ ResumePHI->addOperand(ResumePHI->getOperand(1));
+ ResumePHI->setOperand(0, IVUpdate);
+
+ // Move the IV update, if necessary (and safe), then update the index
+ // operand of the GEP so that we load the next vector iteration's exit
+ // condition data.
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+ if (!VPDT.properlyDominates(IVUpdate, GEP))
+ IVUpdate->moveBefore(*GEP->getParent(), GEP->getIterator());
+ GEP->setOperand(1, IVUpdate);
+
+ // Update middle block branch to use IVUpdate vs. the full trip count,
+ // since we may be exiting the vector loop early.
+ VPRecipeBase *OldTerminator = MiddleBlock->getTerminator();
+ VPBuilder MBBuilder(OldTerminator);
+ VPValue *FullTC =
+ MBBuilder.createICmp(CmpInst::ICMP_EQ, IVUpdate, Plan.getTripCount());
+ OldTerminator->setOperand(0, FullTC);
+ return true;
+}
+
void VPlanTransforms::materializeStepVectors(VPlan &Plan) {
for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index d284d916633c8..18841aa2055f9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -162,6 +162,8 @@ struct VPlanTransforms {
tryAddExplicitVectorLength(VPlan &Plan,
const std::optional<unsigned> &MaxEVLSafeElements);
+ static bool tryEarlyExitConversion(VPlan &Plan);
+
// For each Interleave Group in \p InterleaveGroups replace the Recipes
// widening its memory instructions with a single VPInterleaveRecipe at its
// insertion point.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 638156eab7a84..7805bdf20e6d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -152,6 +152,8 @@ class VPValue {
return Current != user_end();
}
+ bool hasOneUse() const { return Users.size() == 1; }
+
void replaceAllUsesWith(VPValue *New);
/// Go through the uses list for this VPValue and make each use point to \p
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index b8205545a4f5e..bf4559b758223 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -225,7 +225,9 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPredInstPHIRecipe,
VPIRPhi>(UI) ||
(isa<VPInstruction>(UI) &&
- cast<VPInstruction>(UI)->getOpcode() == Instruction::PHI))
+ (cast<VPInstruction>(UI)->getOpcode() == Instruction::PHI ||
+ cast<VPInstruction>(UI)->getOpcode() ==
+ VPInstruction::ResumePhi)))
continue;
// If the user is in the same block, check it comes after R in the
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 74b0c2c0e033a..1e9b55d9e560d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -575,6 +575,82 @@ loop.end:
ret i64 %retval
}
+define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: define void @loop_contains_store_single_user(
+; CHECK-SAME: ptr noalias dereferenceable(40) [[ARRAY:%.*]], ptr readonly align 2 dereferenceable(40) [[PRED:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], splat (i16 500)
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH]], label [[VECTOR_PH_SPLIT:%.*]]
+; CHECK: vector.ph.split:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_SPLIT]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i16, ptr [[ARRAY]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i16> [[WIDE_LOAD1]], splat (i16 1)
+; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP5]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 [[INDEX_NEXT]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP7]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2
+; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD2]], splat (i16 500)
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20
+; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20
+; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_PH]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT: [[ST_ADDR:%.*]] = getelementptr inbounds nuw i16, ptr [[ARRAY]], i64 [[IV]]
+; CHECK-NEXT: [[DATA:%.*]] = load i16, ptr [[ST_ADDR]], align 2
+; CHECK-NEXT: [[INC:%.*]] = add nsw i16 [[DATA]], 1
+; CHECK-NEXT: store i16 [[INC]], ptr [[ST_ADDR]], align 2
+; CHECK-NEXT: [[EE_ADDR:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 [[IV]]
+; CHECK-NEXT: [[EE_VAL:%.*]] = load i16, ptr [[EE_ADDR]], align 2
+; CHECK-NEXT: [[EE_COND:%.*]] = icmp sgt i16 [[EE_VAL]], 500
+; CHECK-NEXT: br i1 [[EE_COND]], label [[EXIT]], label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[COUNTED_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 20
+; CHECK-NEXT: br i1 [[COUNTED_COND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
declare i32 @foo(i32) readonly
declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
@@ -595,4 +671,6 @@ attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
+; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 3a8aec34dfe43..2578260fe878d 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
; return 0;
; }
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load
; CHECK: remark: source.cpp:5:9: loop not vectorized
; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index de455c81d363e..6eb9fc2adeb70 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -445,7 +445,7 @@ loop.end:
define i64 @loop_contains_store(ptr %dest) {
; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
-; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops
+; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
entry:
%p1 = alloca [1024 x i8]
call void @init_mem(ptr %p1, i64 1024)
@@ -470,6 +470,192 @@ loop.end:
ret i64 %retval
}
+define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_single_user'
+; CHECK: LV: We can vectorize this loop!
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_multi_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_multi_user'
+; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ %unused = add i16 %ee.val, 42
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_fcmp(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp'
+; CHECK: LV: Not vectorizing: Early exit loop with store but no condition load.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw half, ptr %pred, i64 %iv
+ %ee.val = load half, ptr %ee.addr, align 2
+ %ee.cond = fcmp ugt half %ee.val, 500.0
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
+; CHECK: LV: Not vectorizing: No dependencies allowed for early exit condition load.
+entry:
+ %forward = getelementptr i16, ptr %pred, i64 -8
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ %some.addr = getelementptr inbounds nuw i16, ptr %forward, i64 %iv
+ store i16 42, ptr %some.addr, align 2
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
+; CHECK: LV: Not vectorizing: Uncounted loop condition not known safe.
+entry:
+ %n_bytes = mul nuw nsw i32 %n, 2
+ call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ]
+ %tc = sext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, %tc
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
+; CHECK: LV: Not vectorizing: Complex writes to memory unsupported in early exit loops.
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store volatile i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+define void @exit_conditions_combined(ptr noalias dereferenceable(40) %array, ptr readonly align 2 dereferenceable(40) %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'exit_conditions_combined'
+; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+ %data = load i16, ptr %st.addr, align 2
+ %inc = add nsw i16 %data, 1
+ store i16 %inc, ptr %st.addr, align 2
+ %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+ %ee.val = load i16, ptr %ee.addr, align 2
+ %ee.cond = icmp sgt i16 %ee.val, 500
+ %iv.next = add nuw nsw i64 %iv, 1
+ %counted.cond = icmp eq i64 %iv.next, 20
+ %or.cond = select i1 %ee.cond, i1 true, i1 %counted.cond
+ br i1 %or.cond, label %exit, label %for.body
+
+exit: ; preds = %for.body
+ ret void
+}
define i64 @uncountable_exit_in_conditional_block(ptr %mask) {
; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_in_conditional_block'
>From d1e60620af11335592f065b1b337234ea665bafe Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 14 May 2025 15:06:39 +0000
Subject: [PATCH 2/4] Remove redundant variable, add helper to clear ee state,
change load check to !invariant
---
.../Vectorize/LoopVectorizationLegality.h | 17 +++++++++++------
.../Vectorize/LoopVectorizationLegality.cpp | 10 +++-------
2 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 1c153a203d4ec..c98b50702ed66 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -409,7 +409,7 @@ class LoopVectorizationLegality {
/// Returns true if this is an early exit loop containing a store.
bool isConditionCopyRequired() const {
- return RequiresEarlyExitConditionCopy;
+ return EarlyExitLoad.has_value();
}
/// Returns the load instruction, if any, nearest to an uncountable early
@@ -545,6 +545,12 @@ class LoopVectorizationLegality {
/// additional cases safely.
bool isVectorizableEarlyExitLoop();
+ /// Clears any current early exit data gathered if a check failed.
+ void clearEarlyExitData() {
+ UncountableEdge = std::nullopt;
+ EarlyExitLoad = std::nullopt;
+ }
+
/// Return true if all of the instructions in the block can be speculatively
/// executed, and record the loads/stores that require masking.
/// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -664,14 +670,13 @@ class LoopVectorizationLegality {
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
- /// Indicates that we will need to copy the early exit condition into
- /// the vector preheader, as we will need to mask some operations in
- /// the loop (e.g. stores).
- bool RequiresEarlyExitConditionCopy = false;
-
/// The load used to determine an uncountable early-exit condition. This is
/// only used to allow further analysis in canVectorizeMemory if we found
/// what looks like a valid early exit loop with store beforehand.
+ ///
+ /// Also indicates that we will need to copy the early exit condition into
+ /// the vector preheader, as we will need to mask some operations in
+ /// the loop (e.g. stores) or bail out to a scalar loop.
std::optional<LoadInst *> EarlyExitLoad;
};
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 71a1770cf7c75..4b2d341f910ec 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1698,7 +1698,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
if (Cmp && Cmp->hasOneUse() &&
TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
- if (Load && Load->hasOneUse() && TheLoop->contains(Load))
+ if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load))
EELoad = Load;
}
}
@@ -1853,10 +1853,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
"backedge taken count: "
<< *SymbolicMaxBTC << '\n');
UncountableEdge = SingleUncountableEdge;
- if (HasStore) {
- RequiresEarlyExitConditionCopy = true;
+ if (HasStore)
EarlyExitLoad = EELoad;
- }
return true;
}
@@ -1929,9 +1927,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
} else {
if (!isVectorizableEarlyExitLoop()) {
- UncountableEdge = std::nullopt;
- EarlyExitLoad = std::nullopt;
- RequiresEarlyExitConditionCopy = false;
+ clearEarlyExitData();
if (DoExtraAnalysis)
Result = false;
else
>From 5ea9208a2257be448658554b391fc0175e2c1d5b Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 14 May 2025 15:43:49 +0000
Subject: [PATCH 3/4] Move exit load checks after store detection
---
.../Vectorize/LoopVectorizationLegality.cpp | 93 ++++++++++---------
1 file changed, 47 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 4b2d341f910ec..7fae9ef0f5828 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1658,7 +1658,6 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
// Keep a record of all the exiting blocks.
SmallVector<const SCEVPredicate *, 4> Predicates;
std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge;
- std::optional<LoadInst *> EELoad;
for (BasicBlock *BB : ExitingBlocks) {
const SCEV *EC =
PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
@@ -1688,21 +1687,6 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
return false;
}
- // For loops with stores.
- // Record load for analysis by isDereferenceableAndAlignedInLoop
- // and later by dependence analysis.
- if (BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator())) {
- // FIXME: Handle exit conditions with multiple users, more complex exit
- // conditions than br(icmp(load, loop_inv)).
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
- if (Cmp && Cmp->hasOneUse() &&
- TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
- LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
- if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load))
- EELoad = Load;
- }
- }
-
SingleUncountableEdge = {BB, ExitBlock};
} else
CountableExitingBlocks.push_back(BB);
@@ -1795,39 +1779,56 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
// TODO: Handle loops that may fault.
Predicates.clear();
- if (HasStore && EELoad.has_value()) {
- LoadInst *LI = *EELoad;
- if (isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(), *DT, AC,
- &Predicates)) {
- ICFLoopSafetyInfo SafetyInfo;
- SafetyInfo.computeLoopSafetyInfo(TheLoop);
- // FIXME: We may have multiple levels of conditional loads, so will
- // need to improve on outright rejection at some point.
- if (!SafetyInfo.isGuaranteedToExecute(*LI, DT, TheLoop)) {
- LLVM_DEBUG(
- dbgs() << "Early exit condition load not guaranteed to execute.\n");
- reportVectorizationFailure(
- "Early exit condition load not guaranteed to execute",
- "Cannot vectorize early exit loop when condition load is not "
- "guaranteed to execute",
- "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+ std::optional<LoadInst *> EELoad;
+ if (HasStore) {
+ // Record load for analysis by isDereferenceableAndAlignedInLoop
+ // and later by dependence analysis.
+ if (BranchInst *Br = dyn_cast<BranchInst>(SingleUncountableEdge->first->getTerminator())) {
+ // FIXME: Handle exit conditions with multiple users, more complex exit
+ // conditions than br(icmp(load, loop_inv)).
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+ if (Cmp && Cmp->hasOneUse() &&
+ TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+ LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+ if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
+ if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
+ &Predicates)) {
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(TheLoop);
+ // FIXME: We may have multiple levels of conditional loads, so will
+ // need to improve on outright rejection at some point.
+ if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+ EELoad = Load;
+ } else {
+ LLVM_DEBUG(
+ dbgs() << "Early exit condition load not guaranteed to execute.\n");
+ reportVectorizationFailure(
+ "Early exit condition load not guaranteed to execute",
+ "Cannot vectorize early exit loop when condition load is not "
+ "guaranteed to execute",
+ "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
+ reportVectorizationFailure("Uncounted loop condition not known safe",
+ "Cannot vectorize early exit loop with "
+ "possibly unsafe condition load",
+ "PotentiallyFaultingEarlyExitLoop", ORE,
+ TheLoop);
+ return false;
+ }
+ }
}
- } else {
- LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
- reportVectorizationFailure("Uncounted loop condition not known safe",
- "Cannot vectorize early exit loop with "
- "possibly unsafe condition load",
- "PotentiallyFaultingEarlyExitLoop", ORE,
- TheLoop);
+ }
+
+ if (!EELoad.has_value()) {
+ LLVM_DEBUG(dbgs() << "Found early exit store but no condition load.\n");
+ reportVectorizationFailure(
+ "Early exit loop with store but no condition load",
+ "Cannot vectorize early exit loop with store but no condition load",
+ "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
return false;
}
- } else if (HasStore) {
- LLVM_DEBUG(dbgs() << "Found early exit store but no condition load.\n");
- reportVectorizationFailure(
- "Early exit loop with store but no condition load",
- "Cannot vectorize early exit loop with store but no condition load",
- "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
- return false;
} else {
// Read-only loop.
// FIXME: as with the loops with stores, only the loads contributing to
>From de34099d3571292539e206b83227a4a2b1ff1af3 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 14 May 2025 16:27:55 +0000
Subject: [PATCH 4/4] Improve memory vectorization checks
---
.../Vectorize/LoopVectorizationLegality.h | 4 +-
.../Vectorize/LoopVectorizationLegality.cpp | 53 +++++++++----------
2 files changed, 27 insertions(+), 30 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index c98b50702ed66..1dae09a5ec61d 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -408,9 +408,7 @@ class LoopVectorizationLegality {
}
/// Returns true if this is an early exit loop containing a store.
- bool isConditionCopyRequired() const {
- return EarlyExitLoad.has_value();
- }
+ bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); }
/// Returns the load instruction, if any, nearest to an uncountable early
/// exit.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7fae9ef0f5828..6513069d07454 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1210,16 +1210,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
});
}
- // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
- // since we're (potentially) doing the load out of its normal order
- // in the loop and that may throw off dependency checking.
- // A forward dependency should be fine, but a backwards dep may not
- // be even if LAA thinks it is due to performing the load for the
- // vector iteration i+1 in vector iteration i.
- if (isConditionCopyRequired()) {
- assert(EarlyExitLoad.has_value() && "EE Store without condition load.");
-
- if (LAI->canVectorizeMemory()) {
+ if (LAI->canVectorizeMemory()) {
+ // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+ // since we're (potentially) doing the load out of its normal order
+ // in the loop and that may throw off dependency checking.
+ // A forward dependency should be fine, but a backwards dep may not
+ // be even if LAA thinks it is due to performing the load for the
+ // vector iteration i+1 in vector iteration i.
+ if (isConditionCopyRequired()) {
const MemoryDepChecker &DepChecker = LAI->getDepChecker();
const auto *Deps = DepChecker.getDependences();
@@ -1238,9 +1236,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
}
- }
-
- if (!LAI->canVectorizeMemory())
+ } else if (!isConditionCopyRequired())
return canVectorizeIndirectUnsafeDependences();
if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
@@ -1783,7 +1779,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
if (HasStore) {
// Record load for analysis by isDereferenceableAndAlignedInLoop
// and later by dependence analysis.
- if (BranchInst *Br = dyn_cast<BranchInst>(SingleUncountableEdge->first->getTerminator())) {
+ if (BranchInst *Br = dyn_cast<BranchInst>(
+ SingleUncountableEdge->first->getTerminator())) {
// FIXME: Handle exit conditions with multiple users, more complex exit
// conditions than br(icmp(load, loop_inv)).
ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
@@ -1791,8 +1788,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
- if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
- &Predicates)) {
+ if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(),
+ *DT, AC, &Predicates)) {
ICFLoopSafetyInfo SafetyInfo;
SafetyInfo.computeLoopSafetyInfo(TheLoop);
// FIXME: We may have multiple levels of conditional loads, so will
@@ -1801,20 +1798,22 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
EELoad = Load;
} else {
LLVM_DEBUG(
- dbgs() << "Early exit condition load not guaranteed to execute.\n");
+ dbgs()
+ << "Early exit condition load not guaranteed to execute.\n");
reportVectorizationFailure(
- "Early exit condition load not guaranteed to execute",
- "Cannot vectorize early exit loop when condition load is not "
- "guaranteed to execute",
- "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+ "Early exit condition load not guaranteed to execute",
+ "Cannot vectorize early exit loop when condition load is not "
+ "guaranteed to execute",
+ "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
}
} else {
- LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
- reportVectorizationFailure("Uncounted loop condition not known safe",
- "Cannot vectorize early exit loop with "
- "possibly unsafe condition load",
- "PotentiallyFaultingEarlyExitLoop", ORE,
- TheLoop);
+ LLVM_DEBUG(dbgs()
+ << "Early exit condition load potentially unsafe.\n");
+ reportVectorizationFailure(
+ "Uncounted loop condition not known safe",
+ "Cannot vectorize early exit loop with "
+ "possibly unsafe condition load",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
return false;
}
}
More information about the llvm-commits
mailing list