[llvm] [LV] Initial support for stores in early exit loops (PR #137774)

Graham Hunter via llvm-commits llvm-commits at lists.llvm.org
Wed May 21 04:02:52 PDT 2025


https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/137774

>From 620f8797ebd6ea9a2700d20cf01106a3e3b201f2 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 4 Apr 2025 15:44:52 +0000
Subject: [PATCH 1/4] [LV] Initial support for stores in early exit loops

Adds some basic support for a simple early exit loop with a store.

This is vectorized such that when the next vector iteration would
exit, we bail out to the scalar loop to handle the exit.
---
 .../Vectorize/LoopVectorizationLegality.h     |  19 ++
 .../Vectorize/LoopVectorizationLegality.cpp   | 125 ++++++++++-
 .../Transforms/Vectorize/LoopVectorize.cpp    |  23 +-
 llvm/lib/Transforms/Vectorize/VPlan.h         |  17 ++
 .../Transforms/Vectorize/VPlanPatternMatch.h  |  44 ++++
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |   4 +-
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 197 ++++++++++++++----
 .../Transforms/Vectorize/VPlanTransforms.h    |   2 +
 llvm/lib/Transforms/Vectorize/VPlanValue.h    |   2 +
 .../Transforms/Vectorize/VPlanVerifier.cpp    |   4 +-
 .../AArch64/simple_early_exit.ll              |  78 +++++++
 .../Transforms/LoopVectorize/control-flow.ll  |   2 +-
 .../LoopVectorize/early_exit_legality.ll      | 188 ++++++++++++++++-
 13 files changed, 652 insertions(+), 53 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..1c153a203d4ec 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -407,6 +407,15 @@ class LoopVectorizationLegality {
     return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr;
   }
 
+  /// Returns true if this is an early exit loop containing a store.
+  bool isConditionCopyRequired() const {
+    return RequiresEarlyExitConditionCopy;
+  }
+
+  /// Returns the load instruction, if any, nearest to an uncountable early
+  /// exit.
+  std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }
+
   /// Return true if there is store-load forwarding dependencies.
   bool isSafeForAnyStoreLoadForwardDistances() const {
     return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
@@ -654,6 +663,16 @@ class LoopVectorizationLegality {
   /// Keep track of the loop edge to an uncountable exit, comprising a pair
   /// of (Exiting, Exit) blocks, if there is exactly one early exit.
   std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
+
+  /// Indicates that we will need to copy the early exit condition into
+  /// the vector preheader, as we will need to mask some operations in
+  /// the loop (e.g. stores).
+  bool RequiresEarlyExitConditionCopy = false;
+
+  /// The load used to determine an uncountable early-exit condition. This is
+  /// only used to allow further analysis in canVectorizeMemory if we found
+  /// what looks like a valid early exit loop with store beforehand.
+  std::optional<LoadInst *> EarlyExitLoad;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 8e09e6f8d4935..71a1770cf7c75 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1209,6 +1210,36 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     });
   }
 
+  // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+  //        since we're (potentially) doing the load out of its normal order
+  //        in the loop and that may throw off dependency checking.
+  //        A forward dependency should be fine, but a backwards dep may not
+  //        be even if LAA thinks it is due to performing the load for the
+  //        vector iteration i+1 in vector iteration i.
+  if (isConditionCopyRequired()) {
+    assert(EarlyExitLoad.has_value() && "EE Store without condition load.");
+
+    if (LAI->canVectorizeMemory()) {
+      const MemoryDepChecker &DepChecker = LAI->getDepChecker();
+      const auto *Deps = DepChecker.getDependences();
+
+      for (const MemoryDepChecker::Dependence &Dep : *Deps) {
+        if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
+            Dep.getSource(DepChecker) == EarlyExitLoad) {
+          // Refine language a little? This currently only applies when a store
+          // is present in the early exit loop.
+          reportVectorizationFailure(
+              "No dependencies allowed for early exit condition load",
+              "Early exit condition loads may not have a dependence with "
+              "another"
+              " memory operation.",
+              "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+          return false;
+        }
+      }
+    }
+  }
+
   if (!LAI->canVectorizeMemory())
     return canVectorizeIndirectUnsafeDependences();
 
@@ -1627,6 +1658,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   // Keep a record of all the exiting blocks.
   SmallVector<const SCEVPredicate *, 4> Predicates;
   std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge;
+  std::optional<LoadInst *> EELoad;
   for (BasicBlock *BB : ExitingBlocks) {
     const SCEV *EC =
         PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
@@ -1656,6 +1688,21 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
         return false;
       }
 
+      // For loops with stores.
+      // Record load for analysis by isDereferenceableAndAlignedInLoop
+      // and later by dependence analysis.
+      if (BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator())) {
+        // FIXME: Handle exit conditions with multiple users, more complex exit
+        //        conditions than br(icmp(load, loop_inv)).
+        ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+        if (Cmp && Cmp->hasOneUse() &&
+            TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+          LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+          if (Load && Load->hasOneUse() && TheLoop->contains(Load))
+            EELoad = Load;
+        }
+      }
+
       SingleUncountableEdge = {BB, ExitBlock};
     } else
       CountableExitingBlocks.push_back(BB);
@@ -1708,16 +1755,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     }
   };
 
+  bool HasStore = false;
   for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        HasStore = true;
+        if (SI->isSimple())
+          continue;
+
+        reportVectorizationFailure(
+            "Complex writes to memory unsupported in early exit loops",
+            "Cannot vectorize early exit loop with complex writes to memory",
+            "WritesInEarlyExitLoop", ORE, TheLoop);
+        return false;
+      }
+
       if (I.mayWriteToMemory()) {
         // We don't support writes to memory.
         reportVectorizationFailure(
-            "Writes to memory unsupported in early exit loops",
-            "Cannot vectorize early exit loop with writes to memory",
+            "Complex writes to memory unsupported in early exit loops",
+            "Cannot vectorize early exit loop with complex writes to memory",
             "WritesInEarlyExitLoop", ORE, TheLoop);
         return false;
-      } else if (!IsSafeOperation(&I)) {
+      }
+
+      if (!IsSafeOperation(&I)) {
         reportVectorizationFailure("Early exit loop contains operations that "
                                    "cannot be speculatively executed",
                                    "UnsafeOperationsEarlyExitLoop", ORE,
@@ -1732,13 +1794,53 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
 
   // TODO: Handle loops that may fault.
   Predicates.clear();
-  if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
-                                     &Predicates)) {
+
+  if (HasStore && EELoad.has_value()) {
+    LoadInst *LI = *EELoad;
+    if (isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(), *DT, AC,
+                                          &Predicates)) {
+      ICFLoopSafetyInfo SafetyInfo;
+      SafetyInfo.computeLoopSafetyInfo(TheLoop);
+      // FIXME: We may have multiple levels of conditional loads, so will
+      //        need to improve on outright rejection at some point.
+      if (!SafetyInfo.isGuaranteedToExecute(*LI, DT, TheLoop)) {
+        LLVM_DEBUG(
+            dbgs() << "Early exit condition load not guaranteed to execute.\n");
+        reportVectorizationFailure(
+            "Early exit condition load not guaranteed to execute",
+            "Cannot vectorize early exit loop when condition load is not "
+            "guaranteed to execute",
+            "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+      }
+    } else {
+      LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
+      reportVectorizationFailure("Uncounted loop condition not known safe",
+                                 "Cannot vectorize early exit loop with "
+                                 "possibly unsafe condition load",
+                                 "PotentiallyFaultingEarlyExitLoop", ORE,
+                                 TheLoop);
+      return false;
+    }
+  } else if (HasStore) {
+    LLVM_DEBUG(dbgs() << "Found early exit store but no condition load.\n");
     reportVectorizationFailure(
-        "Loop may fault",
-        "Cannot vectorize potentially faulting early exit loop",
-        "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+        "Early exit loop with store but no condition load",
+        "Cannot vectorize early exit loop with store but no condition load",
+        "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
     return false;
+  } else {
+    // Read-only loop.
+    // FIXME: as with the loops with stores, only the loads contributing to
+    //        the loop condition need to be guaranteed dereferenceable and
+    //        aligned.
+    if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
+                                       &Predicates)) {
+      reportVectorizationFailure(
+          "Loop may fault",
+          "Cannot vectorize potentially faulting early exit loop",
+          "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+      return false;
+    }
   }
 
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
@@ -1751,6 +1853,11 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
                        "backedge taken count: "
                     << *SymbolicMaxBTC << '\n');
   UncountableEdge = SingleUncountableEdge;
+  if (HasStore) {
+    RequiresEarlyExitConditionCopy = true;
+    EarlyExitLoad = EELoad;
+  }
+
   return true;
 }
 
@@ -1823,6 +1930,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
     } else {
       if (!isVectorizableEarlyExitLoop()) {
         UncountableEdge = std::nullopt;
+        EarlyExitLoad = std::nullopt;
+        RequiresEarlyExitConditionCopy = false;
         if (DoExtraAnalysis)
           Result = false;
         else
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7ad02956a5b69..dd734b231fb6c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2530,8 +2530,10 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
 static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
   VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
   for (auto &R : make_early_inc_range(*VPBB)) {
-    assert(!R.isPhi() && "Tried to move phi recipe to end of block");
-    R.moveBefore(*IRVPBB, IRVPBB->end());
+    if (R.isPhi())
+      R.moveBefore(*IRVPBB, IRVPBB->getFirstNonPhi());
+    else
+      R.moveBefore(*IRVPBB, IRVPBB->end());
   }
 
   VPBlockUtils::reassociateBlocks(VPBB, IRVPBB);
@@ -9100,6 +9102,15 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
         VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths,
                                  *Plan, CM.getMinimalBitwidths());
       VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
+
+      // See if we can convert an early exit vplan to bail out to a scalar
+      // loop if state-changing operations (like stores) are present and
+      // an exit will be taken in the next vector iteration.
+      // If not, discard the plan.
+      if (Legal->isConditionCopyRequired() && !HasScalarVF &&
+          !VPlanTransforms::runPass(VPlanTransforms::tryEarlyExitConversion,
+                                    *Plan))
+        break;
       // TODO: try to put it close to addActiveLaneMask().
       // Discard the plan if it is not EVL-compatible
       if (CM.foldTailWithEVL() && !HasScalarVF &&
@@ -9380,6 +9391,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
           Range);
   DenseMap<const VPBlockBase *, BasicBlock *> VPB2IRBB;
   auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+  // FIXME: Better place to put this? Or maybe an enum for how to handle
+  //        early exits?
+  if (Legal->hasUncountableEarlyExit())
+    Plan->setEarlyExitContinuesInScalarLoop(Legal->isConditionCopyRequired());
   VPlanTransforms::prepareForVectorization(
       *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
       CM.foldTailByMasking(), OrigLoop,
@@ -9681,6 +9696,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
 
   DenseMap<const VPBlockBase *, BasicBlock *> VPB2IRBB;
   auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+  // FIXME: Better place to put this? Or maybe an enum for how to handle
+  //        early exits?
+  if (Legal->hasUncountableEarlyExit())
+    Plan->setEarlyExitContinuesInScalarLoop(Legal->isConditionCopyRequired());
   VPlanTransforms::prepareForVectorization(
       *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop,
       getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()), false,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 2c4cac7655ec9..2b7ab5a4254dd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3636,6 +3636,13 @@ class VPlan {
   /// VPlan is destroyed.
   SmallVector<VPBlockBase *> CreatedBlocks;
 
+  /// Indicates that an early exit loop will exit before the condition is
+  /// reached, and that the scalar loop must perform the last few iterations.
+  /// FIXME: Is this the right place? We mainly want to make sure that we
+  ///        know about this for transforming the plan to copy&move the exit
+  ///        condition, but maybe it doesn't need to be in the plan itself.
+  bool EarlyExitContinuesInScalarLoop = false;
+
   /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
   /// wrapping the original header of the scalar loop.
   VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
@@ -3939,6 +3946,16 @@ class VPlan {
     return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1;
   }
 
+  /// Returns true if all exit paths should reach the scalar loop.
+  bool shouldEarlyExitContinueInScalarLoop() const {
+    return EarlyExitContinuesInScalarLoop;
+  }
+
+  /// Set early exit vectorization to always reach the scalar loop.
+  void setEarlyExitContinuesInScalarLoop(bool Continues) {
+    EarlyExitContinuesInScalarLoop = Continues;
+  }
+
   /// Returns true if the scalar tail may execute after the vector loop. Note
   /// that this relies on unneeded branches to the scalar tail loop being
   /// removed.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index f2a7f16e19a79..3b3e2415d7b00 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -41,6 +41,17 @@ template <typename Class> struct class_match {
 /// Match an arbitrary VPValue and ignore it.
 inline class_match<VPValue> m_VPValue() { return class_match<VPValue>(); }
 
+struct loop_invariant_vpvalue {
+  template <typename ITy> bool match(ITy *V) const {
+    VPValue *Val = dyn_cast<VPValue>(V);
+    return Val && Val->isDefinedOutsideLoopRegions();
+  }
+};
+
+inline loop_invariant_vpvalue m_LoopInvVPValue() {
+  return loop_invariant_vpvalue();
+}
+
 template <typename Class> struct bind_ty {
   Class *&VR;
 
@@ -324,6 +335,12 @@ m_Not(const Op0_t &Op0) {
   return m_VPInstruction<VPInstruction::Not>(Op0);
 }
 
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+m_AnyOf(const Op0_t &Op0) {
+  return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
 template <typename Op0_t>
 inline UnaryVPInstruction_match<Op0_t, VPInstruction::BranchOnCond>
 m_BranchOnCond(const Op0_t &Op0) {
@@ -431,6 +448,19 @@ inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
   return GEPLikeRecipe_match<Op0_t, Op1_t>(Op0, Op1);
 }
 
+// FIXME: Separate Commutative matcher? Share result type?
+// FIXME: Are there other recipe types for ICmp?
+template <typename Op0_t, typename Op1_t>
+using ICmpRecipe_match =
+    BinaryRecipe_match<Op0_t, Op1_t, Instruction::ICmp, false, VPWidenRecipe,
+                       VPReplicateRecipe>;
+
+template <typename Op0_t, typename Op1_t>
+inline ICmpRecipe_match<Op0_t, Op1_t> m_ICmp(const Op0_t &Op0,
+                                             const Op1_t &Op1) {
+  return ICmpRecipe_match<Op0_t, Op1_t>(Op0, Op1);
+}
+
 template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
 using AllTernaryRecipe_match =
     Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, false,
@@ -581,6 +611,20 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
   return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
 }
 
+template <typename SubPattern_t> struct OneUse_match {
+  SubPattern_t SubPattern;
+
+  OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    return V->hasOneUse() && SubPattern.match(V);
+  }
+};
+
+template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
+  return SubPattern;
+}
+
 } // namespace VPlanPatternMatch
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3c7ab7d24bf6d..f8b1ff5d77ec7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -276,7 +276,9 @@ InstructionCost VPRecipeBase::computeCost(ElementCount VF,
 bool VPRecipeBase::isPhi() const {
   return (getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC) ||
          (isa<VPInstruction>(this) &&
-          cast<VPInstruction>(this)->getOpcode() == Instruction::PHI) ||
+          (cast<VPInstruction>(this)->getOpcode() == Instruction::PHI ||
+           cast<VPInstruction>(this)->getOpcode() ==
+               VPInstruction::ResumePhi)) ||
          isa<VPIRPhi>(this);
 }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 806c20ef8cf73..93016da928a54 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2493,48 +2493,52 @@ void VPlanTransforms::handleUncountableEarlyExit(
   // block if CondToEarlyExit.
   VPValue *IsEarlyExitTaken =
       Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
-  VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
-  VPBasicBlock *VectorEarlyExitVPBB =
-      Plan.createVPBasicBlock("vector.early.exit");
-  VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
-  VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
-  NewMiddle->swapSuccessors();
-
-  VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
-
-  // Update the exit phis in the early exit block.
-  VPBuilder MiddleBuilder(NewMiddle);
-  VPBuilder EarlyExitB(VectorEarlyExitVPBB);
-  for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
-    auto *ExitIRI = cast<VPIRPhi>(&R);
-    // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
-    // a single predecessor and 1 if it has two.
-    unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
-    if (ExitIRI->getNumOperands() != 1) {
-      // The first of two operands corresponds to the latch exit, via MiddleVPBB
-      // predecessor. Extract its last lane.
-      ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
-    }
 
-    VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
-    auto IsVector = [](ElementCount VF) { return VF.isVector(); };
-    // When the VFs are vectors, need to add `extract` to get the incoming value
-    // from early exit. When the range contains scalar VF, limit the range to
-    // scalar VF to prevent mis-compilation for the range containing both scalar
-    // and vector VFs.
-    if (!IncomingFromEarlyExit->isLiveIn() &&
-        LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
-      // Update the incoming value from the early exit.
-      VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
-          VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
-          "first.active.lane");
-      IncomingFromEarlyExit = EarlyExitB.createNaryOp(
-          Instruction::ExtractElement, {IncomingFromEarlyExit, FirstActiveLane},
-          nullptr, "early.exit.value");
-      ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
+  if (!Plan.shouldEarlyExitContinueInScalarLoop()) {
+    VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
+    VPBasicBlock *VectorEarlyExitVPBB =
+        Plan.createVPBasicBlock("vector.early.exit");
+    VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
+    VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
+    NewMiddle->swapSuccessors();
+
+    VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
+
+    // Update the exit phis in the early exit block.
+    VPBuilder MiddleBuilder(NewMiddle);
+    VPBuilder EarlyExitB(VectorEarlyExitVPBB);
+    for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
+      auto *ExitIRI = cast<VPIRPhi>(&R);
+      // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
+      // a single predecessor and 1 if it has two.
+      unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
+      if (ExitIRI->getNumOperands() != 1) {
+        // The first of two operands corresponds to the latch exit, via
+        // MiddleVPBB predecessor. Extract its last lane.
+        ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
+      }
+
+      VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
+      auto IsVector = [](ElementCount VF) { return VF.isVector(); };
+      // When the VFs are vectors, need to add `extract` to get the incoming
+      // value from early exit. When the range contains scalar VF, limit the
+      // range to scalar VF to prevent mis-compilation for the range containing
+      // both scalar and vector VFs.
+      if (!IncomingFromEarlyExit->isLiveIn() &&
+          LoopVectorizationPlanner::getDecisionAndClampRange(IsVector, Range)) {
+        // Update the incoming value from the early exit.
+        VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
+            VPInstruction::FirstActiveLane, {CondToEarlyExit}, nullptr,
+            "first.active.lane");
+        IncomingFromEarlyExit =
+            EarlyExitB.createNaryOp(Instruction::ExtractElement,
+                                    {IncomingFromEarlyExit, FirstActiveLane},
+                                    nullptr, "early.exit.value");
+        ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
+      }
     }
+    MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
   }
-  MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});
 
   // Replace the condition controlling the non-early exit from the vector loop
   // with one exiting if either the original condition of the vector latch is
@@ -2551,6 +2555,121 @@ void VPlanTransforms::handleUncountableEarlyExit(
   LatchExitingBranch->eraseFromParent();
 }
 
+bool VPlanTransforms::tryEarlyExitConversion(VPlan &Plan) {
+  // We can abandon a vplan entirely if we return false here, so we shouldn't
+  // crash if some earlier assumptions on scalar IR don't hold for the vplan
+  // version of the loop.
+  if (Plan.hasScalarVFOnly())
+    return false;
+  auto *Region = Plan.getVectorLoopRegion();
+  using namespace llvm::VPlanPatternMatch;
+  VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+
+  // Find the uncounted loop exit condition.
+  VPValue *Uncounted = nullptr;
+  if (!match(Region->getExitingBasicBlock()->getTerminator(),
+             m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+                 m_OneUse(m_AnyOf(m_VPValue(Uncounted))), m_VPValue())))))
+    return false;
+
+  // FIXME: Copy while scanning through IR; no need to save into a list, and
+  //        we avoid problems with cloning differing recipe types.
+
+  // Extract the IR needed to create the uncountable exit condition.
+  // Looking for br(or(any_of(icmp(load(gep(base, iv)), loop_inv)), counted)
+  // FIXME: Build a list of nodes to copy below instead of matching
+  //        the exact pattern.
+  // FIXME: We should be able to handle multiple users for at least some of
+  //        these nodes; requires creating phis.
+  // FIXME: This does feel a bit fragile; is it better to do this earlier
+  //        when creating the initial recipe based on the scalar IR, instead
+  //        of the vplan equivalent here?
+  // FIXME: New vplan pattern matchers; m_Load, m_ICmp, m_OneUse, etc.
+  auto *Cmp = dyn_cast<VPWidenRecipe>(Uncounted);
+  if (!Cmp || !Cmp->hasOneUse() || Cmp->getOpcode() != Instruction::ICmp ||
+      !Cmp->getOperand(1)->isDefinedOutsideLoopRegions())
+    return false;
+  auto *Load = dyn_cast<VPWidenLoadRecipe>(Cmp->getOperand(0));
+  if (!Load || !Load->hasOneUse() || !Load->isConsecutive())
+    return false;
+  auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Load->getAddr());
+  if (!VecPtr || !VecPtr->hasOneUse())
+    return false;
+
+  VPReplicateRecipe *GEP = dyn_cast<VPReplicateRecipe>(VecPtr->getOperand(0));
+  if (!GEP || !match(GEP, m_GetElementPtr(
+                              m_LoopInvVPValue(),
+                              m_ScalarIVSteps(m_Specific(IV), m_SpecificInt(1),
+                                              m_Specific(&Plan.getVF())))))
+    return false;
+
+  VPInstruction *IVUpdate = dyn_cast<VPInstruction>(IV->getBackedgeValue());
+  if (!IVUpdate)
+    return false;
+
+  // Duplicate exit IR and use the starting value for the IV phi.
+  auto *VectorPH = Plan.getVectorPreheader();
+  VPBuilder PHBuilder(VectorPH, VectorPH->getFirstNonPhi());
+  VPReplicateRecipe *PHGEP = GEP->clone();
+  PHGEP->setOperand(1, IV->getStartValue());
+  PHBuilder.insert(PHGEP);
+  VPVectorPointerRecipe *PHVecPtr = VecPtr->clone();
+  PHVecPtr->setOperand(0, PHGEP);
+  PHBuilder.insert(PHVecPtr);
+  VPWidenLoadRecipe *PHLoad = Load->clone();
+  PHLoad->setOperand(0, PHVecPtr);
+  PHBuilder.insert(PHLoad);
+  VPWidenRecipe *PHCmp = Cmp->clone();
+  PHCmp->setOperand(0, PHLoad);
+  PHBuilder.insert(PHCmp);
+
+  // Split vector preheader to form a new bypass block.
+  VPBasicBlock *NewPH = VectorPH->splitAt(PHBuilder.getInsertPoint());
+  VPBasicBlock *ScalarPH = Plan.getScalarPreheader();
+  VPValue *PHAnyOf = PHBuilder.createNaryOp(VPInstruction::AnyOf, {PHCmp});
+  PHBuilder.createNaryOp(VPInstruction::BranchOnCond, {PHAnyOf},
+                         PHCmp->getDebugLoc());
+  VectorPH->clearSuccessors();
+  VectorPH->setTwoSuccessors(ScalarPH, NewPH);
+
+  // Fix up the resume phi in scalar preheader -- we might not have reached
+  // the calculated maximum vector tripcount, so just use the next value of IV.
+  // FIXME: Can we rely on the resume phi being first?
+  //        Might need a map. For now, grab the phis in the block and abandon
+  //        if there's more than one.
+  VPInstruction *ResumePHI = nullptr;
+  for (VPRecipeBase &PHI : ScalarPH->phis()) {
+    if (ResumePHI)
+      return false;
+    ResumePHI = dyn_cast<VPInstruction>(&PHI);
+  }
+  if (!ResumePHI || ResumePHI->getOpcode() != VPInstruction::ResumePhi)
+    return false;
+  VPBasicBlock *MiddleBlock = Plan.getMiddleBlock();
+  ScalarPH->clearPredecessors();
+  ScalarPH->setPredecessors({MiddleBlock, VectorPH});
+  ResumePHI->addOperand(ResumePHI->getOperand(1));
+  ResumePHI->setOperand(0, IVUpdate);
+
+  // Move the IV update, if necessary (and safe), then update the index
+  // operand of the GEP so that we load the next vector iteration's exit
+  // condition data.
+  VPDominatorTree VPDT;
+  VPDT.recalculate(Plan);
+  if (!VPDT.properlyDominates(IVUpdate, GEP))
+    IVUpdate->moveBefore(*GEP->getParent(), GEP->getIterator());
+  GEP->setOperand(1, IVUpdate);
+
+  // Update middle block branch to use IVUpdate vs. the full trip count,
+  // since we may be exiting the vector loop early.
+  VPRecipeBase *OldTerminator = MiddleBlock->getTerminator();
+  VPBuilder MBBuilder(OldTerminator);
+  VPValue *FullTC =
+      MBBuilder.createICmp(CmpInst::ICMP_EQ, IVUpdate, Plan.getTripCount());
+  OldTerminator->setOperand(0, FullTC);
+  return true;
+}
+
 void VPlanTransforms::materializeStepVectors(VPlan &Plan) {
   for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
     auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index d284d916633c8..18841aa2055f9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -162,6 +162,8 @@ struct VPlanTransforms {
   tryAddExplicitVectorLength(VPlan &Plan,
                              const std::optional<unsigned> &MaxEVLSafeElements);
 
+  static bool tryEarlyExitConversion(VPlan &Plan);
+
   // For each Interleave Group in \p InterleaveGroups replace the Recipes
   // widening its memory instructions with a single VPInterleaveRecipe at its
   // insertion point.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 638156eab7a84..7805bdf20e6d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -152,6 +152,8 @@ class VPValue {
     return Current != user_end();
   }
 
+  bool hasOneUse() const { return Users.size() == 1; }
+
   void replaceAllUsesWith(VPValue *New);
 
   /// Go through the uses list for this VPValue and make each use point to \p
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index b8205545a4f5e..bf4559b758223 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -225,7 +225,9 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
             isa<VPHeaderPHIRecipe, VPWidenPHIRecipe, VPPredInstPHIRecipe,
                 VPIRPhi>(UI) ||
             (isa<VPInstruction>(UI) &&
-             cast<VPInstruction>(UI)->getOpcode() == Instruction::PHI))
+             (cast<VPInstruction>(UI)->getOpcode() == Instruction::PHI ||
+              cast<VPInstruction>(UI)->getOpcode() ==
+                  VPInstruction::ResumePhi)))
           continue;
 
         // If the user is in the same block, check it comes after R in the
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
index 74b0c2c0e033a..1e9b55d9e560d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll
@@ -575,6 +575,82 @@ loop.end:
   ret i64 %retval
 }
 
+define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: define void @loop_contains_store_single_user(
+; CHECK-SAME: ptr noalias dereferenceable(40) [[ARRAY:%.*]], ptr readonly align 2 dereferenceable(40) [[PRED:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP0]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], splat (i16 500)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH]], label [[VECTOR_PH_SPLIT:%.*]]
+; CHECK:       vector.ph.split:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_SPLIT]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw i16, ptr [[ARRAY]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i16> [[WIDE_LOAD1]], splat (i16 1)
+; CHECK-NEXT:    store <4 x i16> [[TMP6]], ptr [[TMP5]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 [[INDEX_NEXT]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP7]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD2]], splat (i16 500)
+; CHECK-NEXT:    [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20
+; CHECK-NEXT:    [[TMP12:%.*]] = or i1 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20
+; CHECK-NEXT:    br i1 [[TMP13]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_PH]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[ST_ADDR:%.*]] = getelementptr inbounds nuw i16, ptr [[ARRAY]], i64 [[IV]]
+; CHECK-NEXT:    [[DATA:%.*]] = load i16, ptr [[ST_ADDR]], align 2
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i16 [[DATA]], 1
+; CHECK-NEXT:    store i16 [[INC]], ptr [[ST_ADDR]], align 2
+; CHECK-NEXT:    [[EE_ADDR:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 [[IV]]
+; CHECK-NEXT:    [[EE_VAL:%.*]] = load i16, ptr [[EE_ADDR]], align 2
+; CHECK-NEXT:    [[EE_COND:%.*]] = icmp sgt i16 [[EE_VAL]], 500
+; CHECK-NEXT:    br i1 [[EE_COND]], label [[EXIT]], label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COUNTED_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 20
+; CHECK-NEXT:    br i1 [[COUNTED_COND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 declare i32 @foo(i32) readonly
 declare <vscale x 4 x i32> @foo_vec(<vscale x 4 x i32>)
 
@@ -595,4 +671,6 @@ attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
 ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
 ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
 ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
+; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
+; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 3a8aec34dfe43..2578260fe878d 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
 ;   return 0;
 ; }
 
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load
 ; CHECK: remark: source.cpp:5:9: loop not vectorized
 
 ; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index de455c81d363e..6eb9fc2adeb70 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -445,7 +445,7 @@ loop.end:
 
 define i64 @loop_contains_store(ptr %dest) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
 entry:
   %p1 = alloca [1024 x i8]
   call void @init_mem(ptr %p1, i64 1024)
@@ -470,6 +470,192 @@ loop.end:
   ret i64 %retval
 }
 
+define void @loop_contains_store_single_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_single_user'
+; CHECK:       LV: We can vectorize this loop!
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_multi_user(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_multi_user'
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  %unused = add i16 %ee.val, 42
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_fcmp(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp'
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw half, ptr %pred, i64 %iv
+  %ee.val = load half, ptr %ee.addr, align 2
+  %ee.cond = fcmp ugt half %ee.val, 500.0
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
+; CHECK:       LV: Not vectorizing: No dependencies allowed for early exit condition load.
+entry:
+  %forward = getelementptr i16, ptr %pred, i64 -8
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  %some.addr = getelementptr inbounds nuw i16, ptr %forward, i64 %iv
+  store i16 42, ptr %some.addr, align 2
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
+; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
+entry:
+  %n_bytes = mul nuw nsw i32 %n, 2
+  call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ]
+  %tc = sext i32 %n to i64
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, %tc
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
+; CHECK:       LV: Not vectorizing: Complex writes to memory unsupported in early exit loops.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store volatile i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @exit_conditions_combined(ptr noalias dereferenceable(40) %array, ptr readonly align 2 dereferenceable(40) %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'exit_conditions_combined'
+; CHECK:       LV: Not vectorizing: Cannot vectorize uncountable loop.
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  %or.cond = select i1 %ee.cond, i1 true, i1 %counted.cond
+  br i1 %or.cond, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body
+  ret void
+}
 
 define i64 @uncountable_exit_in_conditional_block(ptr %mask) {
 ; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_in_conditional_block'

>From d1e60620af11335592f065b1b337234ea665bafe Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 14 May 2025 15:06:39 +0000
Subject: [PATCH 2/4] Remove redundant variable, add helper to clear ee state,
 change load check to !invariant

---
 .../Vectorize/LoopVectorizationLegality.h       | 17 +++++++++++------
 .../Vectorize/LoopVectorizationLegality.cpp     | 10 +++-------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 1c153a203d4ec..c98b50702ed66 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -409,7 +409,7 @@ class LoopVectorizationLegality {
 
   /// Returns true if this is an early exit loop containing a store.
   bool isConditionCopyRequired() const {
-    return RequiresEarlyExitConditionCopy;
+    return EarlyExitLoad.has_value();
   }
 
   /// Returns the load instruction, if any, nearest to an uncountable early
@@ -545,6 +545,12 @@ class LoopVectorizationLegality {
   /// additional cases safely.
   bool isVectorizableEarlyExitLoop();
 
+  /// Clears any current early exit data gathered if a check failed.
+  void clearEarlyExitData() {
+    UncountableEdge = std::nullopt;
+    EarlyExitLoad = std::nullopt;
+  }
+
   /// Return true if all of the instructions in the block can be speculatively
   /// executed, and record the loads/stores that require masking.
   /// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -664,14 +670,13 @@ class LoopVectorizationLegality {
   /// of (Exiting, Exit) blocks, if there is exactly one early exit.
   std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
 
-  /// Indicates that we will need to copy the early exit condition into
-  /// the vector preheader, as we will need to mask some operations in
-  /// the loop (e.g. stores).
-  bool RequiresEarlyExitConditionCopy = false;
-
   /// The load used to determine an uncountable early-exit condition. This is
   /// only used to allow further analysis in canVectorizeMemory if we found
   /// what looks like a valid early exit loop with store beforehand.
+  ///
+  /// Also indicates that we will need to copy the early exit condition into
+  /// the vector preheader, as we will need to mask some operations in
+  /// the loop (e.g. stores) or bail out to a scalar loop.
   std::optional<LoadInst *> EarlyExitLoad;
 };
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 71a1770cf7c75..4b2d341f910ec 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1698,7 +1698,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
         if (Cmp && Cmp->hasOneUse() &&
             TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
           LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
-          if (Load && Load->hasOneUse() && TheLoop->contains(Load))
+          if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load))
             EELoad = Load;
         }
       }
@@ -1853,10 +1853,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
                        "backedge taken count: "
                     << *SymbolicMaxBTC << '\n');
   UncountableEdge = SingleUncountableEdge;
-  if (HasStore) {
-    RequiresEarlyExitConditionCopy = true;
+  if (HasStore)
     EarlyExitLoad = EELoad;
-  }
 
   return true;
 }
@@ -1929,9 +1927,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
         return false;
     } else {
       if (!isVectorizableEarlyExitLoop()) {
-        UncountableEdge = std::nullopt;
-        EarlyExitLoad = std::nullopt;
-        RequiresEarlyExitConditionCopy = false;
+        clearEarlyExitData();
         if (DoExtraAnalysis)
           Result = false;
         else

>From 5ea9208a2257be448658554b391fc0175e2c1d5b Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 14 May 2025 15:43:49 +0000
Subject: [PATCH 3/4] Move exit load checks after store detection

---
 .../Vectorize/LoopVectorizationLegality.cpp   | 93 ++++++++++---------
 1 file changed, 47 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 4b2d341f910ec..7fae9ef0f5828 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1658,7 +1658,6 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   // Keep a record of all the exiting blocks.
   SmallVector<const SCEVPredicate *, 4> Predicates;
   std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge;
-  std::optional<LoadInst *> EELoad;
   for (BasicBlock *BB : ExitingBlocks) {
     const SCEV *EC =
         PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates);
@@ -1688,21 +1687,6 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
         return false;
       }
 
-      // For loops with stores.
-      // Record load for analysis by isDereferenceableAndAlignedInLoop
-      // and later by dependence analysis.
-      if (BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator())) {
-        // FIXME: Handle exit conditions with multiple users, more complex exit
-        //        conditions than br(icmp(load, loop_inv)).
-        ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
-        if (Cmp && Cmp->hasOneUse() &&
-            TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
-          LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
-          if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load))
-            EELoad = Load;
-        }
-      }
-
       SingleUncountableEdge = {BB, ExitBlock};
     } else
       CountableExitingBlocks.push_back(BB);
@@ -1795,39 +1779,56 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   // TODO: Handle loops that may fault.
   Predicates.clear();
 
-  if (HasStore && EELoad.has_value()) {
-    LoadInst *LI = *EELoad;
-    if (isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(), *DT, AC,
-                                          &Predicates)) {
-      ICFLoopSafetyInfo SafetyInfo;
-      SafetyInfo.computeLoopSafetyInfo(TheLoop);
-      // FIXME: We may have multiple levels of conditional loads, so will
-      //        need to improve on outright rejection at some point.
-      if (!SafetyInfo.isGuaranteedToExecute(*LI, DT, TheLoop)) {
-        LLVM_DEBUG(
-            dbgs() << "Early exit condition load not guaranteed to execute.\n");
-        reportVectorizationFailure(
-            "Early exit condition load not guaranteed to execute",
-            "Cannot vectorize early exit loop when condition load is not "
-            "guaranteed to execute",
-            "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+  std::optional<LoadInst *> EELoad;
+  if (HasStore) {
+    // Record load for analysis by isDereferenceableAndAlignedInLoop
+    // and later by dependence analysis.
+    if (BranchInst *Br = dyn_cast<BranchInst>(SingleUncountableEdge->first->getTerminator())) {
+      // FIXME: Handle exit conditions with multiple users, more complex exit
+      //        conditions than br(icmp(load, loop_inv)).
+      ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+      if (Cmp && Cmp->hasOneUse() &&
+          TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+        LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+        if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
+          if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
+                                                &Predicates)) {
+            ICFLoopSafetyInfo SafetyInfo;
+            SafetyInfo.computeLoopSafetyInfo(TheLoop);
+            // FIXME: We may have multiple levels of conditional loads, so will
+            //        need to improve on outright rejection at some point.
+            if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+              EELoad = Load;
+            } else {
+              LLVM_DEBUG(
+              dbgs() << "Early exit condition load not guaranteed to execute.\n");
+              reportVectorizationFailure(
+              "Early exit condition load not guaranteed to execute",
+              "Cannot vectorize early exit loop when condition load is not "
+              "guaranteed to execute",
+              "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+            }
+          } else {
+            LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
+            reportVectorizationFailure("Uncounted loop condition not known safe",
+             "Cannot vectorize early exit loop with "
+             "possibly unsafe condition load",
+             "PotentiallyFaultingEarlyExitLoop", ORE,
+             TheLoop);
+            return false;
+          }
+        }
       }
-    } else {
-      LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
-      reportVectorizationFailure("Uncounted loop condition not known safe",
-                                 "Cannot vectorize early exit loop with "
-                                 "possibly unsafe condition load",
-                                 "PotentiallyFaultingEarlyExitLoop", ORE,
-                                 TheLoop);
+    }
+
+    if (!EELoad.has_value()) {
+      LLVM_DEBUG(dbgs() << "Found early exit store but no condition load.\n");
+      reportVectorizationFailure(
+          "Early exit loop with store but no condition load",
+          "Cannot vectorize early exit loop with store but no condition load",
+          "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
       return false;
     }
-  } else if (HasStore) {
-    LLVM_DEBUG(dbgs() << "Found early exit store but no condition load.\n");
-    reportVectorizationFailure(
-        "Early exit loop with store but no condition load",
-        "Cannot vectorize early exit loop with store but no condition load",
-        "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
-    return false;
   } else {
     // Read-only loop.
     // FIXME: as with the loops with stores, only the loads contributing to

>From de34099d3571292539e206b83227a4a2b1ff1af3 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 14 May 2025 16:27:55 +0000
Subject: [PATCH 4/4] Improve memory vectorization checks

---
 .../Vectorize/LoopVectorizationLegality.h     |  4 +-
 .../Vectorize/LoopVectorizationLegality.cpp   | 53 +++++++++----------
 2 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index c98b50702ed66..1dae09a5ec61d 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -408,9 +408,7 @@ class LoopVectorizationLegality {
   }
 
   /// Returns true if this is an early exit loop containing a store.
-  bool isConditionCopyRequired() const {
-    return EarlyExitLoad.has_value();
-  }
+  bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); }
 
   /// Returns the load instruction, if any, nearest to an uncountable early
   /// exit.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 7fae9ef0f5828..6513069d07454 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1210,16 +1210,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     });
   }
 
-  // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
-  //        since we're (potentially) doing the load out of its normal order
-  //        in the loop and that may throw off dependency checking.
-  //        A forward dependency should be fine, but a backwards dep may not
-  //        be even if LAA thinks it is due to performing the load for the
-  //        vector iteration i+1 in vector iteration i.
-  if (isConditionCopyRequired()) {
-    assert(EarlyExitLoad.has_value() && "EE Store without condition load.");
-
-    if (LAI->canVectorizeMemory()) {
+  if (LAI->canVectorizeMemory()) {
+    // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+    //        since we're (potentially) doing the load out of its normal order
+    //        in the loop and that may throw off dependency checking.
+    //        A forward dependency should be fine, but a backwards dep may not
+    //        be even if LAA thinks it is due to performing the load for the
+    //        vector iteration i+1 in vector iteration i.
+    if (isConditionCopyRequired()) {
       const MemoryDepChecker &DepChecker = LAI->getDepChecker();
       const auto *Deps = DepChecker.getDependences();
 
@@ -1238,9 +1236,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
         }
       }
     }
-  }
-
-  if (!LAI->canVectorizeMemory())
+  } else if (!isConditionCopyRequired())
     return canVectorizeIndirectUnsafeDependences();
 
   if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
@@ -1783,7 +1779,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   if (HasStore) {
     // Record load for analysis by isDereferenceableAndAlignedInLoop
     // and later by dependence analysis.
-    if (BranchInst *Br = dyn_cast<BranchInst>(SingleUncountableEdge->first->getTerminator())) {
+    if (BranchInst *Br = dyn_cast<BranchInst>(
+            SingleUncountableEdge->first->getTerminator())) {
       // FIXME: Handle exit conditions with multiple users, more complex exit
       //        conditions than br(icmp(load, loop_inv)).
       ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
@@ -1791,8 +1788,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
           TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
         LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
         if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
-          if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
-                                                &Predicates)) {
+          if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(),
+                                                *DT, AC, &Predicates)) {
             ICFLoopSafetyInfo SafetyInfo;
             SafetyInfo.computeLoopSafetyInfo(TheLoop);
             // FIXME: We may have multiple levels of conditional loads, so will
@@ -1801,20 +1798,22 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
               EELoad = Load;
             } else {
               LLVM_DEBUG(
-              dbgs() << "Early exit condition load not guaranteed to execute.\n");
+                  dbgs()
+                  << "Early exit condition load not guaranteed to execute.\n");
               reportVectorizationFailure(
-              "Early exit condition load not guaranteed to execute",
-              "Cannot vectorize early exit loop when condition load is not "
-              "guaranteed to execute",
-              "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+                  "Early exit condition load not guaranteed to execute",
+                  "Cannot vectorize early exit loop when condition load is not "
+                  "guaranteed to execute",
+                  "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
             }
           } else {
-            LLVM_DEBUG(dbgs() << "Early exit condition load potentially unsafe.\n");
-            reportVectorizationFailure("Uncounted loop condition not known safe",
-             "Cannot vectorize early exit loop with "
-             "possibly unsafe condition load",
-             "PotentiallyFaultingEarlyExitLoop", ORE,
-             TheLoop);
+            LLVM_DEBUG(dbgs()
+                       << "Early exit condition load potentially unsafe.\n");
+            reportVectorizationFailure(
+                "Uncounted loop condition not known safe",
+                "Cannot vectorize early exit loop with "
+                "possibly unsafe condition load",
+                "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
             return false;
           }
         }



More information about the llvm-commits mailing list