[llvm] [LV] Add initial legality checks for early exit loops with side effects (PR #145663)

Graham Hunter via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 9 05:12:38 PDT 2025


https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/145663

>From 3c6f7fe2e4910f172425847ed56169240b749618 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 24 Jun 2025 14:13:19 +0000
Subject: [PATCH 01/18] [LV] Add initial legality checks for ee loops with
 stores

---
 .../Vectorize/LoopVectorizationLegality.h     |  18 +++
 .../Vectorize/LoopVectorizationLegality.cpp   | 121 +++++++++++++++++-
 .../Transforms/LoopVectorize/control-flow.ll  |   2 +-
 .../early_exit_store_legality.ll              |  20 +--
 4 files changed, 145 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 60f92735055bc..85438aaa56c66 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -407,6 +407,14 @@ class LoopVectorizationLegality {
     return UncountableExitingBB;
   }
 
+  /// Returns true if this is an early exit loop containing a store.
+  bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); }
+
+  /// Returns the load instruction, if any, directly used for an exit comparison
+  /// in and early exit loop containing state-changing or potentially-faulting
+  /// operations.
+  std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }
+
   /// Return true if there is store-load forwarding dependencies.
   bool isSafeForAnyStoreLoadForwardDistances() const {
     return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
@@ -538,6 +546,12 @@ class LoopVectorizationLegality {
   /// additional cases safely.
   bool isVectorizableEarlyExitLoop();
 
+  /// Clears any current early exit data gathered if a check failed.
+  void clearEarlyExitData() {
+    UncountableExitingBB = nullptr;
+    EarlyExitLoad = std::nullopt;
+  }
+
   /// Return true if all of the instructions in the block can be speculatively
   /// executed, and record the loads/stores that require masking.
   /// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -659,6 +673,10 @@ class LoopVectorizationLegality {
   /// Keep track of an uncountable exiting block, if there is exactly one early
   /// exit.
   BasicBlock *UncountableExitingBB = nullptr;
+
+  /// Keep track of the load used for early exits where state-changing or
+  /// potentially faulting operations occur inside the loop.
+  std::optional<LoadInst *> EarlyExitLoad;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 528ae3822d078..5b11cca7a30f4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1223,8 +1224,42 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     });
   }
 
-  if (!LAI->canVectorizeMemory())
-    return canVectorizeIndirectUnsafeDependences();
+  if (LAI->canVectorizeMemory()) {
+    // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+    //        since we're (potentially) doing the load out of its normal order
+    //        in the loop and that may throw off dependency checking.
+    //        A forward dependency should be fine, but a backwards dep may not
+    //        be even if LAA thinks it is due to performing the load for the
+    //        vector iteration i+1 in vector iteration i.
+    if (isConditionCopyRequired()) {
+      const MemoryDepChecker &DepChecker = LAI->getDepChecker();
+      const auto *Deps = DepChecker.getDependences();
+
+      for (const MemoryDepChecker::Dependence &Dep : *Deps) {
+        if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
+            Dep.getSource(DepChecker) == EarlyExitLoad) {
+          // Refine language a little? This currently only applies when a store
+          // is present in the early exit loop.
+          reportVectorizationFailure(
+              "No dependencies allowed for early exit condition load",
+              "Early exit condition loads may not have a dependence with "
+              "another"
+              " memory operation.",
+              "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+          return false;
+        }
+      }
+    }
+  } else {
+    if (!isConditionCopyRequired())
+      return canVectorizeIndirectUnsafeDependences();
+    reportVectorizationFailure(
+        "Cannot vectorize unsafe dependencies in state-changing early exit "
+        "loop.",
+        "Unable to vectorize memory in an early exit loop with store",
+        "CantVectorizeUnsafeDependencyForEELoopWithStore", ORE, TheLoop);
+    return false;
+  }
 
   if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
     reportVectorizationFailure("We don't allow storing to uniform addresses",
@@ -1755,16 +1790,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     }
   };
 
+  bool HasStore = false;
   for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        HasStore = true;
+        if (SI->isSimple())
+          continue;
+
+        reportVectorizationFailure(
+            "Complex writes to memory unsupported in early exit loops",
+            "Cannot vectorize early exit loop with complex writes to memory",
+            "WritesInEarlyExitLoop", ORE, TheLoop);
+        return false;
+      }
+
       if (I.mayWriteToMemory()) {
         // We don't support writes to memory.
         reportVectorizationFailure(
-            "Writes to memory unsupported in early exit loops",
-            "Cannot vectorize early exit loop with writes to memory",
+            "Complex writes to memory unsupported in early exit loops",
+            "Cannot vectorize early exit loop with complex writes to memory",
             "WritesInEarlyExitLoop", ORE, TheLoop);
         return false;
-      } else if (!IsSafeOperation(&I)) {
+      }
+
+      if (!IsSafeOperation(&I)) {
         reportVectorizationFailure("Early exit loop contains operations that "
                                    "cannot be speculatively executed",
                                    "UnsafeOperationsEarlyExitLoop", ORE,
@@ -1778,6 +1828,57 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
          "Expected latch predecessor to be the early exiting block");
 
   Predicates.clear();
+  if (HasStore) {
+    // Record load for analysis by isDereferenceableAndAlignedInLoop
+    // and later by dependence analysis.
+    if (BranchInst *Br = dyn_cast<BranchInst>(
+            SingleUncountableExitingBlock->getTerminator())) {
+      // FIXME: Handle exit conditions with multiple users, more complex exit
+      //        conditions than br(icmp(load, loop_inv)).
+      ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+      if (Cmp && Cmp->hasOneUse() &&
+          TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+        LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+        if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
+          if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(),
+                                                *DT, AC, &Predicates)) {
+            ICFLoopSafetyInfo SafetyInfo;
+            SafetyInfo.computeLoopSafetyInfo(TheLoop);
+            // FIXME: We may have multiple levels of conditional loads, so will
+            //        need to improve on outright rejection at some point.
+            if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+              EarlyExitLoad = Load;
+            } else {
+              reportVectorizationFailure(
+                  "Early exit condition load not guaranteed to execute",
+                  "Cannot vectorize early exit loop when condition load is not "
+                  "guaranteed to execute",
+                  "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+            }
+          } else {
+            reportVectorizationFailure(
+                "Uncounted loop condition not known safe",
+                "Cannot vectorize early exit loop with "
+                "possibly unsafe condition load",
+                "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+            return false;
+          }
+        }
+      }
+    }
+
+    if (!EarlyExitLoad) {
+      reportVectorizationFailure(
+          "Early exit loop with store but no condition load",
+          "Cannot vectorize early exit loop with store but no condition load",
+          "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+      return false;
+    }
+  } else {
+    // Read-only loop.
+    // FIXME: as with the loops with stores, only the loads contributing to
+    //        the loop condition need to be guaranteed dereferenceable and
+    //        aligned.
   SmallVector<LoadInst *, 4> NonDerefLoads;
   if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
                       &Predicates)) {
@@ -1886,6 +1987,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       if (!isVectorizableEarlyExitLoop()) {
         assert(!hasUncountableEarlyExit() &&
                "Must be false without vectorizable early-exit loop");
+        clearEarlyExitData();
         if (DoExtraAnalysis)
           Result = false;
         else
@@ -1903,6 +2005,15 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
+  // Bail out for state-changing EE loops for now.
+  if (EarlyExitLoad) {
+    reportVectorizationFailure(
+        "Writes to memory unsupported in early exit loops",
+        "Cannot vectorize early exit loop with writes to memory",
+        "WritesInEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
+
   if (Result) {
     LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
                       << (LAI->getRuntimePointerChecking()->Need
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 3a8aec34dfe43..2578260fe878d 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
 ;   return 0;
 ; }
 
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with writes to memory
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load
 ; CHECK: remark: source.cpp:5:9: loop not vectorized
 
 ; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 84d5ceeb601b6..71657b9cfc6a0 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -3,7 +3,7 @@
 
 define i64 @loop_contains_store(ptr %dest) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
 entry:
   %p1 = alloca [1024 x i8]
   call void @init_mem(ptr %p1, i64 1024)
@@ -56,7 +56,7 @@ exit:
 
 define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
 entry:
   br label %for.body
 
@@ -80,7 +80,7 @@ exit:
 
 define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
 entry:
   br label %for.body
 
@@ -106,7 +106,7 @@ exit:
 
 define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: No dependencies allowed for early exit condition load.
 entry:
   %pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
   br label %for.body
@@ -135,7 +135,7 @@ exit:
 
 define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
 entry:
   %unknown.offset = call i64 @get_an_unknown_offset()
   %unknown.cmp = icmp ult i64 %unknown.offset, 20
@@ -149,10 +149,10 @@ for.body:
   %data = load i16, ptr %st.addr, align 2
   %inc = add nsw i16 %data, 1
   store i16 %inc, ptr %st.addr, align 2
-  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
   %ee.val = load i16, ptr %ee.addr, align 2
   %ee.cond = icmp sgt i16 %ee.val, 500
-  %some.addr = getelementptr inbounds nuw i16, ptr %unknown.base, i64 %iv
+  %some.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
   store i16 42, ptr %some.addr, align 2
   br i1 %ee.cond, label %exit, label %for.inc
 
@@ -223,7 +223,7 @@ exit:
 
 define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
 entry:
   br label %for.body
 
@@ -249,7 +249,7 @@ exit:
 
 define void @loop_contains_store_volatile(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_volatile'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Complex writes to memory unsupported in early exit loops.
 entry:
   br label %for.body
 
@@ -353,7 +353,7 @@ exit:
 
 define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
 entry:
   br label %for.body
 

>From 255fdb60c3750fa4f6e237a26fd3d09f3c79720a Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 18 Jul 2025 15:53:58 +0000
Subject: [PATCH 02/18] * Remove load tracking from LVL class, make it a local
 passed as needed * Rename state variable and accessor to something covering
 more cases * Simplified some code following suggestions * Sharing a remark
 when specific subcases aren't interesting * Rebased

---
 .../Vectorize/LoopVectorizationLegality.h     | 26 +++----
 .../Vectorize/LoopVectorizationLegality.cpp   | 76 +++++++++----------
 .../Transforms/LoopVectorize/control-flow.ll  |  2 +-
 .../early_exit_store_legality.ll              |  8 +-
 4 files changed, 53 insertions(+), 59 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 85438aaa56c66..a71563bd51d7f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -407,13 +407,13 @@ class LoopVectorizationLegality {
     return UncountableExitingBB;
   }
 
-  /// Returns true if this is an early exit loop containing a store.
-  bool isConditionCopyRequired() const { return EarlyExitLoad.has_value(); }
-
-  /// Returns the load instruction, if any, directly used for an exit comparison
-  /// in and early exit loop containing state-changing or potentially-faulting
-  /// operations.
-  std::optional<LoadInst *> getEarlyExitLoad() const { return EarlyExitLoad; }
+  /// Returns true if this is an early exit loop with state-changing or
+  /// potentially-faulting operations and the IR representing the condition
+  /// for the uncounted exit must be determined before any of the state changes
+  /// or potentially faulting operations take place.
+  bool hasUncountedExitWithSideEffects() const {
+    return UncountedExitWithSideEffects;
+  }
 
   /// Return true if there is store-load forwarding dependencies.
   bool isSafeForAnyStoreLoadForwardDistances() const {
@@ -514,7 +514,7 @@ class LoopVectorizationLegality {
   /// we read and write from memory. This method checks if it is
   /// legal to vectorize the code, considering only memory constrains.
   /// Returns true if the loop is vectorizable
-  bool canVectorizeMemory();
+  bool canVectorizeMemory(std::optional<LoadInst *>);
 
   /// If LAA cannot determine whether all dependences are safe, we may be able
   /// to further analyse some IndirectUnsafe dependences and if they match a
@@ -544,12 +544,12 @@ class LoopVectorizationLegality {
   /// The list above is not based on theoretical limitations of vectorization,
   /// but simply a statement that more work is needed to support these
   /// additional cases safely.
-  bool isVectorizableEarlyExitLoop();
+  bool isVectorizableEarlyExitLoop(std::optional<LoadInst *> &);
 
   /// Clears any current early exit data gathered if a check failed.
   void clearEarlyExitData() {
     UncountableExitingBB = nullptr;
-    EarlyExitLoad = std::nullopt;
+    UncountedExitWithSideEffects = false;
   }
 
   /// Return true if all of the instructions in the block can be speculatively
@@ -674,9 +674,9 @@ class LoopVectorizationLegality {
   /// exit.
   BasicBlock *UncountableExitingBB = nullptr;
 
-  /// Keep track of the load used for early exits where state-changing or
-  /// potentially faulting operations occur inside the loop.
-  std::optional<LoadInst *> EarlyExitLoad;
+  /// If true, the loop has at least one uncounted exit and operations within
+  /// the loop may have observable side effects.
+  bool UncountedExitWithSideEffects = false;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 5b11cca7a30f4..ff143098ec837 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1214,7 +1214,8 @@ bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
   return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
 }
 
-bool LoopVectorizationLegality::canVectorizeMemory() {
+bool LoopVectorizationLegality::canVectorizeMemory(
+    std::optional<LoadInst *> CriticalEELoad) {
   LAI = &LAIs.getInfo(*TheLoop);
   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
   if (LAR) {
@@ -1231,27 +1232,27 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     //        A forward dependency should be fine, but a backwards dep may not
     //        be even if LAA thinks it is due to performing the load for the
     //        vector iteration i+1 in vector iteration i.
-    if (isConditionCopyRequired()) {
+    if (CriticalEELoad) {
       const MemoryDepChecker &DepChecker = LAI->getDepChecker();
       const auto *Deps = DepChecker.getDependences();
 
-      for (const MemoryDepChecker::Dependence &Dep : *Deps) {
-        if (Dep.getDestination(DepChecker) == EarlyExitLoad ||
-            Dep.getSource(DepChecker) == EarlyExitLoad) {
-          // Refine language a little? This currently only applies when a store
-          // is present in the early exit loop.
-          reportVectorizationFailure(
-              "No dependencies allowed for early exit condition load",
-              "Early exit condition loads may not have a dependence with "
-              "another"
-              " memory operation.",
-              "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
-          return false;
-        }
+      if (any_of(*Deps, [&](const MemoryDepChecker::Dependence &Dep) {
+            return (Dep.getDestination(DepChecker) == *CriticalEELoad ||
+                    Dep.getSource(DepChecker) == *CriticalEELoad);
+          })) {
+        // Refine language a little? This currently only applies when a store
+        // is present in the early exit loop.
+        reportVectorizationFailure(
+            "No dependencies allowed for early exit condition load",
+            "Early exit condition loads may not have a dependence with "
+            "another"
+            " memory operation.",
+            "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+        return false;
       }
     }
   } else {
-    if (!isConditionCopyRequired())
+    if (!hasUncountedExitWithSideEffects())
       return canVectorizeIndirectUnsafeDependences();
     reportVectorizationFailure(
         "Cannot vectorize unsafe dependencies in state-changing early exit "
@@ -1695,7 +1696,8 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
   return Result;
 }
 
-bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
+bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
+    std::optional<LoadInst *> &CriticalEELoad) {
   BasicBlock *LatchBB = TheLoop->getLoopLatch();
   if (!LatchBB) {
     reportVectorizationFailure("Loop does not have a latch",
@@ -1790,22 +1792,14 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     }
   };
 
-  bool HasStore = false;
   for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
-      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
-        HasStore = true;
-        if (SI->isSimple())
+      if (I.mayWriteToMemory()) {
+        if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
+          UncountedExitWithSideEffects = true;
           continue;
+        }
 
-        reportVectorizationFailure(
-            "Complex writes to memory unsupported in early exit loops",
-            "Cannot vectorize early exit loop with complex writes to memory",
-            "WritesInEarlyExitLoop", ORE, TheLoop);
-        return false;
-      }
-
-      if (I.mayWriteToMemory()) {
         // We don't support writes to memory.
         reportVectorizationFailure(
             "Complex writes to memory unsupported in early exit loops",
@@ -1828,7 +1822,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
          "Expected latch predecessor to be the early exiting block");
 
   Predicates.clear();
-  if (HasStore) {
+  if (UncountedExitWithSideEffects) {
     // Record load for analysis by isDereferenceableAndAlignedInLoop
     // and later by dependence analysis.
     if (BranchInst *Br = dyn_cast<BranchInst>(
@@ -1846,20 +1840,18 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
             SafetyInfo.computeLoopSafetyInfo(TheLoop);
             // FIXME: We may have multiple levels of conditional loads, so will
             //        need to improve on outright rejection at some point.
-            if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
-              EarlyExitLoad = Load;
-            } else {
+            if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
+              CriticalEELoad = Load;
+            else
               reportVectorizationFailure(
                   "Early exit condition load not guaranteed to execute",
                   "Cannot vectorize early exit loop when condition load is not "
                   "guaranteed to execute",
                   "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
-            }
           } else {
             reportVectorizationFailure(
-                "Uncounted loop condition not known safe",
-                "Cannot vectorize early exit loop with "
-                "possibly unsafe condition load",
+                "Loop may fault",
+                "Cannot vectorize potentially faulting early exit loop",
                 "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
             return false;
           }
@@ -1867,7 +1859,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
       }
     }
 
-    if (!EarlyExitLoad) {
+    if (!CriticalEELoad) {
       reportVectorizationFailure(
           "Early exit loop with store but no condition load",
           "Cannot vectorize early exit loop with store but no condition load",
@@ -1975,6 +1967,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
+  std::optional<LoadInst *> CriticalEarlyExitUncountedConditionLoad;
   if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
     if (TheLoop->getExitingBlock()) {
       reportVectorizationFailure("Cannot vectorize uncountable loop",
@@ -1984,10 +1977,11 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       else
         return false;
     } else {
-      if (!isVectorizableEarlyExitLoop()) {
+      if (!isVectorizableEarlyExitLoop(CriticalEarlyExitUncountedConditionLoad)) {
         assert(!hasUncountableEarlyExit() &&
                "Must be false without vectorizable early-exit loop");
         clearEarlyExitData();
+        CriticalEarlyExitUncountedConditionLoad.reset();
         if (DoExtraAnalysis)
           Result = false;
         else
@@ -1997,7 +1991,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   }
 
   // Go over each instruction and look at memory deps.
-  if (!canVectorizeMemory()) {
+  if (!canVectorizeMemory(CriticalEarlyExitUncountedConditionLoad)) {
     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
     if (DoExtraAnalysis)
       Result = false;
@@ -2006,7 +2000,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   }
 
   // Bail out for state-changing EE loops for now.
-  if (EarlyExitLoad) {
+  if (UncountedExitWithSideEffects) {
     reportVectorizationFailure(
         "Writes to memory unsupported in early exit loops",
         "Cannot vectorize early exit loop with writes to memory",
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index 2578260fe878d..61836e4a29d58 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -10,7 +10,7 @@
 ;   return 0;
 ; }
 
-; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize early exit loop with possibly unsafe condition load
+; CHECK: remark: source.cpp:5:9: loop not vectorized: Cannot vectorize potentially faulting early exit loop
 ; CHECK: remark: source.cpp:5:9: loop not vectorized
 
 ; CHECK: _Z4testPii
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 71657b9cfc6a0..7f82f9d7572db 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -135,7 +135,7 @@ exit:
 
 define void @loop_contains_store_unsafe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(80) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unsafe_dependency'
-; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
+; CHECK:       LV: Not vectorizing: Loop may fault.
 entry:
   %unknown.offset = call i64 @get_an_unknown_offset()
   %unknown.cmp = icmp ult i64 %unknown.offset, 20
@@ -167,7 +167,7 @@ exit:
 
 define void @loop_contains_store_assumed_bounds(ptr noalias %array, ptr readonly %pred, i32 %n) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_assumed_bounds'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Loop may fault.
 entry:
   %n_bytes = mul nuw nsw i32 %n, 2
   call void @llvm.assume(i1 true) [ "align"(ptr %pred, i64 2), "dereferenceable"(ptr %pred, i32 %n_bytes) ]
@@ -223,7 +223,7 @@ exit:
 
 define void @loop_contains_store_unknown_bounds(ptr align 2 dereferenceable(100) noalias %array, ptr align 2 dereferenceable(100) readonly %pred, i64 %n) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_unknown_bounds'
-; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
+; CHECK:       LV: Not vectorizing: Loop may fault.
 entry:
   br label %for.body
 
@@ -353,7 +353,7 @@ exit:
 
 define void @loop_contains_store_condition_load_is_chained(ptr dereferenceable(40) noalias %array, ptr align 8 dereferenceable(160) readonly %offsets, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_is_chained'
-; CHECK:       LV: Not vectorizing: Uncounted loop condition not known safe.
+; CHECK:       LV: Not vectorizing: Loop may fault.
 entry:
   br label %for.body
 

>From b991d44edc650dfb14b0dc86571169cb729f8dcd Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 31 Jul 2025 14:57:35 +0000
Subject: [PATCH 03/18] Name new parameter in function prototypes

---
 .../llvm/Transforms/Vectorize/LoopVectorizationLegality.h   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index a71563bd51d7f..d540b9c99cfe1 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -514,7 +514,8 @@ class LoopVectorizationLegality {
   /// we read and write from memory. This method checks if it is
   /// legal to vectorize the code, considering only memory constrains.
   /// Returns true if the loop is vectorizable
-  bool canVectorizeMemory(std::optional<LoadInst *>);
+  bool canVectorizeMemory(
+      std::optional<LoadInst *> CriticalEarlyExitUncountedConditionLoad);
 
   /// If LAA cannot determine whether all dependences are safe, we may be able
   /// to further analyse some IndirectUnsafe dependences and if they match a
@@ -544,7 +545,8 @@ class LoopVectorizationLegality {
   /// The list above is not based on theoretical limitations of vectorization,
   /// but simply a statement that more work is needed to support these
   /// additional cases safely.
-  bool isVectorizableEarlyExitLoop(std::optional<LoadInst *> &);
+  bool isVectorizableEarlyExitLoop(
+      std::optional<LoadInst *> &CriticalEarlyExitUncountedConditionLoad);
 
   /// Clears any current early exit data gathered if a check failed.
   void clearEarlyExitData() {

>From 7cae713754f9f8cfa15503a7f147d616f184b49a Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 31 Jul 2025 15:10:44 +0000
Subject: [PATCH 04/18] Improve remarks, remove contentious FIXME

---
 .../Vectorize/LoopVectorizationLegality.cpp   | 20 +++++++++----------
 .../early_exit_store_legality.ll              |  2 +-
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index ff143098ec837..9e00b4b9e0216 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1240,14 +1240,12 @@ bool LoopVectorizationLegality::canVectorizeMemory(
             return (Dep.getDestination(DepChecker) == *CriticalEELoad ||
                     Dep.getSource(DepChecker) == *CriticalEELoad);
           })) {
-        // Refine language a little? This currently only applies when a store
-        // is present in the early exit loop.
         reportVectorizationFailure(
-            "No dependencies allowed for early exit condition load",
-            "Early exit condition loads may not have a dependence with "
-            "another"
-            " memory operation.",
-            "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
+            "No dependencies allowed for critical early exit condition load "
+            "in a loop with side effects",
+            "Critical Early exit condition loads in a loop with side effects "
+            "may not have a dependence with another memory operation.",
+            "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, TheLoop);
         return false;
       }
     }
@@ -1255,10 +1253,10 @@ bool LoopVectorizationLegality::canVectorizeMemory(
     if (!hasUncountedExitWithSideEffects())
       return canVectorizeIndirectUnsafeDependences();
     reportVectorizationFailure(
-        "Cannot vectorize unsafe dependencies in state-changing early exit "
-        "loop.",
-        "Unable to vectorize memory in an early exit loop with store",
-        "CantVectorizeUnsafeDependencyForEELoopWithStore", ORE, TheLoop);
+        "Cannot vectorize unsafe dependencies in early exit loop with "
+        "side effects.",
+        "Unable to vectorize memory in an early exit loop with side effects",
+        "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, TheLoop);
     return false;
   }
 
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 7f82f9d7572db..fc8a4112c1096 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -106,7 +106,7 @@ exit:
 
 define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
-; CHECK:       LV: Not vectorizing: No dependencies allowed for early exit condition load.
+; CHECK:       LV: Not vectorizing: No dependencies allowed for critical early exit condition load in a loop with side effects.
 entry:
   %pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
   br label %for.body

>From 70769dec0911f6ff1835e06e856856b214c4d70f Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 31 Jul 2025 15:33:05 +0000
Subject: [PATCH 05/18] Add FIXME for comparison operator ordering assumption

---
 llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 9e00b4b9e0216..14438e7a265e1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1827,6 +1827,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
             SingleUncountableExitingBlock->getTerminator())) {
       // FIXME: Handle exit conditions with multiple users, more complex exit
       //        conditions than br(icmp(load, loop_inv)).
+      // FIXME: Don't rely on operand ordering for the comparison.
       ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
       if (Cmp && Cmp->hasOneUse() &&
           TheLoop->isLoopInvariant(Cmp->getOperand(1))) {

>From 553cc9313c2e5ecbe252a502753b016417a97afe Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 31 Jul 2025 15:54:30 +0000
Subject: [PATCH 06/18] Added test with a gather load for the uncounted exit
 condition

---
 .../early_exit_store_legality.ll              | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index fc8a4112c1096..55109f7d4cf40 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -405,5 +405,34 @@ exit:
   ret void
 }
 
+define void @loop_contains_store_condition_load_requires_gather(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(512) readonly %pred, ptr align 1 dereferenceable(20) readonly %offsets) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_condition_load_requires_gather'
+; CHECK:       LV: Not vectorizing: Loop may fault.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %offset.addr = getelementptr inbounds nuw i8, ptr %offsets, i64 %iv
+  %offset = load i8, ptr %offset.addr, align 1
+  %offset.zext = zext i8 %offset to i64
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %offset.zext
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 declare void @init_mem(ptr, i64);
 declare i64 @get_an_unknown_offset();

>From a6189e257d87a1b1e55a5deafdcde01ed7b8a8e3 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 31 Jul 2025 16:23:51 +0000
Subject: [PATCH 07/18] Added test with a switch for the uncounted exit

---
 .../early_exit_store_legality.ll              | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 55109f7d4cf40..144161cd2668f 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -434,5 +434,30 @@ exit:
   ret void
 }
 
+define void @loop_contains_store_uncounted_exit_is_a_switch(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_uncounted_exit_is_a_switch'
+; CHECK:       LV: Not vectorizing: Loop contains an unsupported switch
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  switch i16 %ee.val, label %for.inc [ i16 500, label %exit ]
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 declare void @init_mem(ptr, i64);
 declare i64 @get_an_unknown_offset();

>From 3bb93d2de5a7e45698531b726a572e9d829fa98b Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 31 Jul 2025 16:28:53 +0000
Subject: [PATCH 08/18] Added remark for non-branch terminator on uncounted
 exit

---
 .../lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 14438e7a265e1..d14132951e1de 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1856,6 +1856,13 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
           }
         }
       }
+    } else {
+      reportVectorizationFailure(
+        "Unsupported control flow in early exit loop with side effects",
+        "Cannot find branch instruction for uncounted exit in early exit loop "
+        "with side effects",
+        "UnsupportedUncountedExitTerminator", ORE, TheLoop);
+      return false;
     }
 
     if (!CriticalEELoad) {

>From 231d17af54fc95f5f0542a34cb0185e3ebc87c40 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 5 Aug 2025 13:20:57 +0000
Subject: [PATCH 09/18] Initialize LAI earlier if we have an EE loop with side
 effects

---
 .../Vectorize/LoopVectorizationLegality.h     | 14 ++--
 .../Vectorize/LoopVectorizationLegality.cpp   | 80 +++++++++++--------
 2 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d540b9c99cfe1..f183fd8d57452 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -514,8 +514,7 @@ class LoopVectorizationLegality {
   /// we read and write from memory. This method checks if it is
   /// legal to vectorize the code, considering only memory constrains.
   /// Returns true if the loop is vectorizable
-  bool canVectorizeMemory(
-      std::optional<LoadInst *> CriticalEarlyExitUncountedConditionLoad);
+  bool canVectorizeMemory();
 
   /// If LAA cannot determine whether all dependences are safe, we may be able
   /// to further analyse some IndirectUnsafe dependences and if they match a
@@ -533,20 +532,21 @@ class LoopVectorizationLegality {
   /// Returns true if this is an early exit loop that can be vectorized.
   /// Currently, a loop with an uncountable early exit is considered
   /// vectorizable if:
-  ///   1. There are no writes to memory in the loop.
+  ///   1. Writes to memory do not form a dependence with any load used as
+  ///      part of the uncounted exit condition.
   ///   2. The loop has only one early uncountable exit
   ///   3. The early exit block dominates the latch block.
   ///   4. The latch block has an exact exit count.
   ///   5. The loop does not contain reductions or recurrences.
   ///   6. We can prove at compile-time that loops will not contain faulting
-  ///   loads.
+  ///      loads, or that any faulting loads would also occur in a purely
+  ///      scalar loop.
   ///   7. It is safe to speculatively execute instructions such as divide or
-  ///   call instructions.
+  ///      call instructions.
   /// The list above is not based on theoretical limitations of vectorization,
   /// but simply a statement that more work is needed to support these
   /// additional cases safely.
-  bool isVectorizableEarlyExitLoop(
-      std::optional<LoadInst *> &CriticalEarlyExitUncountedConditionLoad);
+  bool isVectorizableEarlyExitLoop();
 
   /// Clears any current early exit data gathered if a check failed.
   void clearEarlyExitData() {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index d14132951e1de..44472d9bc3706 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1214,8 +1214,7 @@ bool LoopVectorizationLegality::canVectorizeIndirectUnsafeDependences() {
   return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
 }
 
-bool LoopVectorizationLegality::canVectorizeMemory(
-    std::optional<LoadInst *> CriticalEELoad) {
+bool LoopVectorizationLegality::canVectorizeMemory() {
   LAI = &LAIs.getInfo(*TheLoop);
   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
   if (LAR) {
@@ -1225,31 +1224,7 @@ bool LoopVectorizationLegality::canVectorizeMemory(
     });
   }
 
-  if (LAI->canVectorizeMemory()) {
-    // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
-    //        since we're (potentially) doing the load out of its normal order
-    //        in the loop and that may throw off dependency checking.
-    //        A forward dependency should be fine, but a backwards dep may not
-    //        be even if LAA thinks it is due to performing the load for the
-    //        vector iteration i+1 in vector iteration i.
-    if (CriticalEELoad) {
-      const MemoryDepChecker &DepChecker = LAI->getDepChecker();
-      const auto *Deps = DepChecker.getDependences();
-
-      if (any_of(*Deps, [&](const MemoryDepChecker::Dependence &Dep) {
-            return (Dep.getDestination(DepChecker) == *CriticalEELoad ||
-                    Dep.getSource(DepChecker) == *CriticalEELoad);
-          })) {
-        reportVectorizationFailure(
-            "No dependencies allowed for critical early exit condition load "
-            "in a loop with side effects",
-            "Critical Early exit condition loads in a loop with side effects "
-            "may not have a dependence with another memory operation.",
-            "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, TheLoop);
-        return false;
-      }
-    }
-  } else {
+  if (!LAI->canVectorizeMemory()) {
     if (!hasUncountedExitWithSideEffects())
       return canVectorizeIndirectUnsafeDependences();
     reportVectorizationFailure(
@@ -1694,8 +1669,7 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
   return Result;
 }
 
-bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
-    std::optional<LoadInst *> &CriticalEELoad) {
+bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   BasicBlock *LatchBB = TheLoop->getLoopLatch();
   if (!LatchBB) {
     reportVectorizationFailure("Loop does not have a latch",
@@ -1820,6 +1794,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
          "Expected latch predecessor to be the early exiting block");
 
   Predicates.clear();
+  LoadInst *CriticalUncountedExitConditionLoad = nullptr;
   if (UncountedExitWithSideEffects) {
     // Record load for analysis by isDereferenceableAndAlignedInLoop
     // and later by dependence analysis.
@@ -1840,7 +1815,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
             // FIXME: We may have multiple levels of conditional loads, so will
             //        need to improve on outright rejection at some point.
             if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
-              CriticalEELoad = Load;
+              CriticalUncountedExitConditionLoad = Load;
             else
               reportVectorizationFailure(
                   "Early exit condition load not guaranteed to execute",
@@ -1865,7 +1840,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
       return false;
     }
 
-    if (!CriticalEELoad) {
+    if (!CriticalUncountedExitConditionLoad) {
       reportVectorizationFailure(
           "Early exit loop with store but no condition load",
           "Cannot vectorize early exit loop with store but no condition load",
@@ -1902,6 +1877,43 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop(
                       << "\n");
   }
 
+  // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
+  //        since we're (potentially) doing the load out of its normal order
+  //        in the loop and that may throw off dependency checking.
+  //        A forward dependency should be fine, but a backwards dep may not
+  //        be even if LAA thinks it is due to performing the load for the
+  //        vector iteration i+1 in vector iteration i.
+  if (CriticalUncountedExitConditionLoad) {
+    LAI = &LAIs.getInfo(*TheLoop);
+    const MemoryDepChecker &DepChecker = LAI->getDepChecker();
+    const auto *Deps = DepChecker.getDependences();
+    if (!Deps) {
+      reportVectorizationFailure(
+          "Invalid memory dependencies result",
+          "Unable to determine memory dependencies for an early exit loop with "
+          "side effects.",
+          "CantVectorizeInvalidDependencesForEELoopsWithSideEffects", ORE,
+          TheLoop);
+      return false;
+    }
+
+    if (any_of(*Deps, [&](const MemoryDepChecker::Dependence &Dep) {
+          return (Dep.getDestination(DepChecker) ==
+                      CriticalUncountedExitConditionLoad ||
+                  Dep.getSource(DepChecker) ==
+                      CriticalUncountedExitConditionLoad);
+        })) {
+      reportVectorizationFailure(
+          "No dependencies allowed for critical early exit condition load "
+          "in a loop with side effects",
+          "Critical Early exit condition loads in a loop with side effects "
+          "may not have a dependence with another memory operation.",
+          "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
+          TheLoop);
+      return false;
+    }
+  }
+
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
       PSE.getSymbolicMaxBackedgeTakenCount();
   // Since we have an exact exit count for the latch and the early exit
@@ -1973,7 +1985,6 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
-  std::optional<LoadInst *> CriticalEarlyExitUncountedConditionLoad;
   if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
     if (TheLoop->getExitingBlock()) {
       reportVectorizationFailure("Cannot vectorize uncountable loop",
@@ -1983,11 +1994,10 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       else
         return false;
     } else {
-      if (!isVectorizableEarlyExitLoop(CriticalEarlyExitUncountedConditionLoad)) {
+      if (!isVectorizableEarlyExitLoop()) {
         assert(!hasUncountableEarlyExit() &&
                "Must be false without vectorizable early-exit loop");
         clearEarlyExitData();
-        CriticalEarlyExitUncountedConditionLoad.reset();
         if (DoExtraAnalysis)
           Result = false;
         else
@@ -1997,7 +2007,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   }
 
   // Go over each instruction and look at memory deps.
-  if (!canVectorizeMemory(CriticalEarlyExitUncountedConditionLoad)) {
+  if (!canVectorizeMemory()) {
     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
     if (DoExtraAnalysis)
       Result = false;

>From 4e5d4c21477aaa8f95b732b80209714f6548d23a Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 5 Aug 2025 14:44:02 +0000
Subject: [PATCH 10/18] Add maxdeps=1 test

---
 .../Transforms/LoopVectorize/early_exit_store_legality.ll   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 144161cd2668f..d938f250c11c4 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: asserts
-; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 | FileCheck %s --check-prefixes=CHECK,NRMDEPS
+; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 -max-dependences=1 | FileCheck %s --check-prefixes=CHECK,MAXDEP1
 
 define i64 @loop_contains_store(ptr %dest) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
@@ -106,7 +107,8 @@ exit:
 
 define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
-; CHECK:       LV: Not vectorizing: No dependencies allowed for critical early exit condition load in a loop with side effects.
+; NRMDEPS:     LV: Not vectorizing: No dependencies allowed for critical early exit condition load in a loop with side effects.
+; MAXDEP1:     LV: Not vectorizing: Invalid memory dependencies result.
 entry:
   %pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
   br label %for.body

>From 1a9360d8c728ad272f8fd7ad4c36d90c625ffb69 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 6 Aug 2025 12:22:14 +0000
Subject: [PATCH 11/18] Separate out ee-with-side-effect checking and try to
 explain it better

---
 .../Vectorize/LoopVectorizationLegality.h     |  61 +++++
 .../Vectorize/LoopVectorizationLegality.cpp   | 218 ++++++++++--------
 2 files changed, 179 insertions(+), 100 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index f183fd8d57452..6627684e84a17 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -548,6 +548,67 @@ class LoopVectorizationLegality {
   /// additional cases safely.
   bool isVectorizableEarlyExitLoop();
 
+  /// When vectorizing an early exit loop containing side effects, we need to
+  /// determine whether an uncounted exit will be taken before any operation
+  /// that has side effects.
+  ///
+  /// Consider a loop like the following:
+  /// for (int i = 0; i < N; ++i) {
+  ///   a[i] = b[i];
+  ///   if (c[i] == 0)
+  ///     break;
+  /// }
+  ///
+  /// We have both a load and a store operation occurring before the condition
+  /// is checked for early termination. We could potentially restrict
+  /// vectorization to cases where we know all addresses are guaranteed to be
+  /// dereferenceable, which would allow the load before the condition check to
+  /// be vectorized.
+  ///
+  /// The store, however, should not execute across all lanes if early
+  /// termination occurs before the end of the vector. We must only store to the
+  /// locations that would have been stored to by a scalar loop. So we need to
+  /// know what the result of 'c[i] == 0' is before performing the vector store,
+  /// with or without masking.
+  ///
+  /// We can either do this by moving the condition load to the top of the
+  /// vector body and using the comparison to create masks for other operations
+  /// in the loop, or by looking ahead one vector iteration and bailing out to
+  /// the scalar loop if an exit would occur.
+  ///
+  /// Using the latter approach (applicable to more targets), we need to hoist
+  /// the first load (of c[0]) out of the loop then rotate the load within the
+  /// loop to the next iteration, remembering to adjust the vector trip count.
+  /// Something like the following:
+  ///
+  /// vec.ph:
+  ///   %ci.0 = load <4 x i32>, ptr %c
+  ///   %cmp.0 = icmp eq <4 x i32> %ci.0, zeroinitializer
+  ///   %any.of.0 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.0)
+  ///   br i1 %any.of.0, label %scalar.ph, label %vec.body
+  /// vec.body:
+  ///   %iv = phi...
+  ///   phi for c[i] if used elsewhere in the loop...
+  ///   other operations in the loop...
+  ///   %iv.next = add i64 %iv, 4
+  ///   %addr.next = getelementptr i32, ptr %c, i64 %iv.next
+  ///   %ci.next = load <4 x i32>, ptr %addr.next
+  ///   %cmp.next = icmp eq <4 x i32> %ci.next, zeroinitializer
+  ///   %any.of.next = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.next)
+  ///   iv.next compared with shortened vector tripcount...
+  ///   uncounted condition combined with counted condition...
+  ///   br...
+  ///
+  /// Doing this means the last few iterations will always be performed by a
+  /// scalar loop regardless of which exit is taken, and so vector iterations
+  /// will never execute a memory operation to a location that the scalar loop
+  /// would not have.
+  ///
+  /// This means we must ensure that it is safe to move the load for 'c[i]'
+  /// before other memory operations (or any other observable side effects) in
+  /// the loop.
+  bool canUncountedExitConditionLoadBeMoved(BasicBlock *ExitingBlock);
+
   /// Clears any current early exit data gathered if a check failed.
   void clearEarlyExitData() {
     UncountableExitingBB = nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 44472d9bc3706..f8f3c7fa2cac7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1772,7 +1772,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
           continue;
         }
 
-        // We don't support writes to memory.
+        // We don't support complex writes to memory.
         reportVectorizationFailure(
             "Complex writes to memory unsupported in early exit loops",
             "Cannot vectorize early exit loop with complex writes to memory",
@@ -1793,73 +1793,22 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
          "Expected latch predecessor to be the early exiting block");
 
-  Predicates.clear();
-  LoadInst *CriticalUncountedExitConditionLoad = nullptr;
-  if (UncountedExitWithSideEffects) {
-    // Record load for analysis by isDereferenceableAndAlignedInLoop
-    // and later by dependence analysis.
-    if (BranchInst *Br = dyn_cast<BranchInst>(
-            SingleUncountableExitingBlock->getTerminator())) {
-      // FIXME: Handle exit conditions with multiple users, more complex exit
-      //        conditions than br(icmp(load, loop_inv)).
-      // FIXME: Don't rely on operand ordering for the comparison.
-      ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
-      if (Cmp && Cmp->hasOneUse() &&
-          TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
-        LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
-        if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
-          if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(),
-                                                *DT, AC, &Predicates)) {
-            ICFLoopSafetyInfo SafetyInfo;
-            SafetyInfo.computeLoopSafetyInfo(TheLoop);
-            // FIXME: We may have multiple levels of conditional loads, so will
-            //        need to improve on outright rejection at some point.
-            if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
-              CriticalUncountedExitConditionLoad = Load;
-            else
-              reportVectorizationFailure(
-                  "Early exit condition load not guaranteed to execute",
-                  "Cannot vectorize early exit loop when condition load is not "
-                  "guaranteed to execute",
-                  "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
-          } else {
-            reportVectorizationFailure(
-                "Loop may fault",
-                "Cannot vectorize potentially faulting early exit loop",
-                "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
-            return false;
-          }
-        }
-      }
-    } else {
-      reportVectorizationFailure(
-        "Unsupported control flow in early exit loop with side effects",
-        "Cannot find branch instruction for uncounted exit in early exit loop "
-        "with side effects",
-        "UnsupportedUncountedExitTerminator", ORE, TheLoop);
-      return false;
-    }
-
-    if (!CriticalUncountedExitConditionLoad) {
-      reportVectorizationFailure(
-          "Early exit loop with store but no condition load",
-          "Cannot vectorize early exit loop with store but no condition load",
-          "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
-      return false;
-    }
-  } else {
-    // Read-only loop.
-    // FIXME: as with the loops with stores, only the loads contributing to
-    //        the loop condition need to be guaranteed dereferenceable and
-    //        aligned.
   SmallVector<LoadInst *, 4> NonDerefLoads;
-  if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
+  // TODO: Handle loops that may fault.
+  if (!UncountedExitWithSideEffects) {
+    // Read-only loop.
+    Predicates.clear();
+    if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
                       &Predicates)) {
-    reportVectorizationFailure("Loop may fault",
-                               "Cannot vectorize non-read-only early exit loop",
-                               "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
+      reportVectorizationFailure("Loop may fault",
+                                 "Cannot vectorize non-read-only early exit loop",
+                                 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
+      return false;
+    }
+  } else if (!canUncountedExitConditionLoadBeMoved(
+                 SingleUncountableExitingBlock))
     return false;
-  }
+
   // Check non-dereferenceable loads if any.
   for (LoadInst *LI : NonDerefLoads) {
     // Only support unit-stride access for now.
@@ -1877,42 +1826,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
                       << "\n");
   }
 
-  // FIXME: Remove or reduce this restriction. We're in a bit of an odd spot
-  //        since we're (potentially) doing the load out of its normal order
-  //        in the loop and that may throw off dependency checking.
-  //        A forward dependency should be fine, but a backwards dep may not
-  //        be even if LAA thinks it is due to performing the load for the
-  //        vector iteration i+1 in vector iteration i.
-  if (CriticalUncountedExitConditionLoad) {
-    LAI = &LAIs.getInfo(*TheLoop);
-    const MemoryDepChecker &DepChecker = LAI->getDepChecker();
-    const auto *Deps = DepChecker.getDependences();
-    if (!Deps) {
-      reportVectorizationFailure(
-          "Invalid memory dependencies result",
-          "Unable to determine memory dependencies for an early exit loop with "
-          "side effects.",
-          "CantVectorizeInvalidDependencesForEELoopsWithSideEffects", ORE,
-          TheLoop);
-      return false;
-    }
 
-    if (any_of(*Deps, [&](const MemoryDepChecker::Dependence &Dep) {
-          return (Dep.getDestination(DepChecker) ==
-                      CriticalUncountedExitConditionLoad ||
-                  Dep.getSource(DepChecker) ==
-                      CriticalUncountedExitConditionLoad);
-        })) {
-      reportVectorizationFailure(
-          "No dependencies allowed for critical early exit condition load "
-          "in a loop with side effects",
-          "Critical Early exit condition loads in a loop with side effects "
-          "may not have a dependence with another memory operation.",
-          "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
-          TheLoop);
-      return false;
-    }
-  }
 
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
       PSE.getSymbolicMaxBackedgeTakenCount();
@@ -1927,6 +1841,110 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   return true;
 }
 
+bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
+    BasicBlock *ExitingBlock) {
+  SmallVector<const SCEVPredicate *, 4> Predicates;
+  LoadInst *CriticalUncountedExitConditionLoad = nullptr;
+
+  // Try to find a load in the critical path for the uncounted exit condition.
+  // This is currently matching about the simplest form we can, expecting
+  // only one in-loop load, the result of which is directly compared against
+  // a loop-invariant value.
+  // FIXME: We're insisting on a single use for now, because otherwise we will
+  // need to make PHI nodes for other users. That can be done once the initial
+  // transform code lands.
+  if (BranchInst *Br = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) {
+    // FIXME: Don't rely on operand ordering for the comparison.
+    ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+    if (Cmp && Cmp->hasOneUse() &&
+        TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+      LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+      if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
+        // The following call also checks that the load address is either
+        // invariant or is an affine SCEVAddRecExpr with a constant step.
+        // In either case, we're not relying on another load.
+        // FIXME: Support gathers after first-faulting support lands.
+        if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT,
+                                              AC, &Predicates)) {
+          ICFLoopSafetyInfo SafetyInfo;
+          SafetyInfo.computeLoopSafetyInfo(TheLoop);
+          // We need to know that load will be executed before we can hoist a
+          // copy out to run just before the first iteration.
+          if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
+            CriticalUncountedExitConditionLoad = Load;
+          else
+            reportVectorizationFailure(
+                "Early exit condition load not guaranteed to execute",
+                "Cannot vectorize early exit loop when condition load is not "
+                "guaranteed to execute",
+                "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+        } else {
+          reportVectorizationFailure(
+              "Loop may fault",
+              "Cannot vectorize potentially faulting early exit loop",
+              "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+          return false;
+        }
+      }
+    }
+  } else {
+    reportVectorizationFailure(
+        "Unsupported control flow in early exit loop with side effects",
+        "Cannot find branch instruction for uncounted exit in early exit loop "
+        "with side effects",
+        "UnsupportedUncountedExitTerminator", ORE, TheLoop);
+    return false;
+  }
+
+  if (!CriticalUncountedExitConditionLoad) {
+    reportVectorizationFailure(
+        "Early exit loop with store but no condition load",
+        "Cannot vectorize early exit loop with store but no condition load",
+        "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
+
+  // We're in a bit of an odd spot since we're (potentially) doing the load
+  // out of its normal order in the loop and that may throw off dependency
+  // checking. A forward dependency should be fine, but a backwards dep may not
+  // be even if LAA thinks it is due to performing the load for the vector
+  // iteration i+1 in vector iteration i.
+  // In any case, prohibit vectorization if there are any loop-carried
+  // dependencies on the critical load.
+  // FIXME: Relax this constraint where possible.
+  LAI = &LAIs.getInfo(*TheLoop);
+  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
+  const auto *Deps = DepChecker.getDependences();
+  if (!Deps) {
+    // We may have exceeded the allowed number of dependencies to track, and
+    // given up. Just bail out since we can't be sure.
+    reportVectorizationFailure(
+        "Invalid memory dependencies result",
+        "Unable to determine memory dependencies for an early exit loop with "
+        "side effects.",
+        "CantVectorizeInvalidDependencesForEELoopsWithSideEffects", ORE,
+        TheLoop);
+    return false;
+  }
+
+  if (any_of(*Deps, [&](const MemoryDepChecker::Dependence &Dep) {
+        return (Dep.getDestination(DepChecker) ==
+                    CriticalUncountedExitConditionLoad ||
+                Dep.getSource(DepChecker) ==
+                    CriticalUncountedExitConditionLoad);
+      })) {
+    reportVectorizationFailure(
+        "No dependencies allowed for critical early exit condition load "
+        "in a loop with side effects",
+        "Critical Early exit condition loads in a loop with side effects "
+        "may not have a dependence with another memory operation.",
+        "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, TheLoop);
+    return false;
+  }
+
+  return true;
+}
+
 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   // Store the result and return it at the end instead of exiting early, in case
   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.

>From 23770b07e5be55c874ce1c29e64116ac3037fa66 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 12 Aug 2025 14:48:17 +0000
Subject: [PATCH 12/18] Improve comments, simplify condition checking

---
 .../Vectorize/LoopVectorizationLegality.h     | 10 +--
 .../Vectorize/LoopVectorizationLegality.cpp   | 90 +++++++++----------
 .../early_exit_store_legality.ll              | 37 +++++++-
 3 files changed, 83 insertions(+), 54 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 6627684e84a17..2d300081527e6 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -607,14 +607,12 @@ class LoopVectorizationLegality {
   /// This means we must ensure that it is safe to move the load for 'c[i]'
   /// before other memory operations (or any other observable side effects) in
   /// the loop.
+  ///
+  /// Currently, c[i] should only have one user (the comparison used for the
+  /// uncounted exit) since we would otherwise need to introduce a PHI node
+  /// for it.
   bool canUncountedExitConditionLoadBeMoved(BasicBlock *ExitingBlock);
 
-  /// Clears any current early exit data gathered if a check failed.
-  void clearEarlyExitData() {
-    UncountableExitingBB = nullptr;
-    UncountedExitWithSideEffects = false;
-  }
-
   /// Return true if all of the instructions in the block can be speculatively
   /// executed, and record the loads/stores that require masking.
   /// \p SafePtrs is a list of addresses that are known to be legal and we know
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f8f3c7fa2cac7..9fc66a67a0d09 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1764,11 +1764,12 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     }
   };
 
+  bool HasSideEffects = false;
   for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
       if (I.mayWriteToMemory()) {
         if (isa<StoreInst>(&I) && cast<StoreInst>(&I)->isSimple()) {
-          UncountedExitWithSideEffects = true;
+          HasSideEffects = true;
           continue;
         }
 
@@ -1795,7 +1796,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
 
   SmallVector<LoadInst *, 4> NonDerefLoads;
   // TODO: Handle loops that may fault.
-  if (!UncountedExitWithSideEffects) {
+  if (!HasSideEffects) {
     // Read-only loop.
     Predicates.clear();
     if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
@@ -1838,6 +1839,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
                        "backedge taken count: "
                     << *SymbolicMaxBTC << '\n');
   UncountableExitingBB = SingleUncountableExitingBlock;
+  UncountedExitWithSideEffects = HasSideEffects;
   return true;
 }
 
@@ -1853,53 +1855,53 @@ bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
   // FIXME: We're insisting on a single use for now, because otherwise we will
   // need to make PHI nodes for other users. That can be done once the initial
   // transform code lands.
-  if (BranchInst *Br = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) {
-    // FIXME: Don't rely on operand ordering for the comparison.
-    ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
-    if (Cmp && Cmp->hasOneUse() &&
-        TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
-      LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
-      if (Load && Load->hasOneUse() && !TheLoop->isLoopInvariant(Load)) {
-        // The following call also checks that the load address is either
-        // invariant or is an affine SCEVAddRecExpr with a constant step.
-        // In either case, we're not relying on another load.
-        // FIXME: Support gathers after first-faulting support lands.
-        if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT,
-                                              AC, &Predicates)) {
-          ICFLoopSafetyInfo SafetyInfo;
-          SafetyInfo.computeLoopSafetyInfo(TheLoop);
-          // We need to know that load will be executed before we can hoist a
-          // copy out to run just before the first iteration.
-          if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
-            CriticalUncountedExitConditionLoad = Load;
-          else
-            reportVectorizationFailure(
-                "Early exit condition load not guaranteed to execute",
-                "Cannot vectorize early exit loop when condition load is not "
-                "guaranteed to execute",
-                "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
-        } else {
+  auto *Br = cast<BranchInst>(ExitingBlock->getTerminator());
+
+  // FIXME: Don't rely on operand ordering for the comparison.
+  ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
+  if (Cmp && Cmp->hasOneUse() && TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
+    LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
+    if (Load && Load->hasOneUse()) {
+      // Make sure that the load address is not loop invariant; we want an
+      // address calculation that we can rotate to the next vector iteration.
+      const SCEV *PtrScev = PSE.getSE()->getSCEV(Load->getPointerOperand());
+      if (PSE.getSE()->isLoopInvariant(PtrScev, TheLoop)) {
+        reportVectorizationFailure(
+            "Uncounted exit condition depends on load from invariant address",
+            "EarlyExitLoadInvariantAddress", ORE, TheLoop);
+        return false;
+      }
+
+      // The following call also checks that the load address is either
+      // invariant (which we've just ruled out) or is an affine SCEVAddRecExpr
+      // with a constant step. In either case, we're not relying on another
+      // load within the loop.
+      // FIXME: Support gathers after first-faulting load support lands.
+      if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT,
+                                            AC, &Predicates)) {
+        ICFLoopSafetyInfo SafetyInfo;
+        SafetyInfo.computeLoopSafetyInfo(TheLoop);
+        // We need to know that load will be executed before we can hoist a
+        // copy out to run just before the first iteration.
+        if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
+          CriticalUncountedExitConditionLoad = Load;
+        else
           reportVectorizationFailure(
-              "Loop may fault",
-              "Cannot vectorize potentially faulting early exit loop",
-              "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
-          return false;
-        }
+              "Early exit condition load not guaranteed to execute",
+              "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+      } else {
+        reportVectorizationFailure(
+            "Loop may fault",
+            "Cannot vectorize potentially faulting early exit loop",
+            "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+        return false;
       }
     }
-  } else {
-    reportVectorizationFailure(
-        "Unsupported control flow in early exit loop with side effects",
-        "Cannot find branch instruction for uncounted exit in early exit loop "
-        "with side effects",
-        "UnsupportedUncountedExitTerminator", ORE, TheLoop);
-    return false;
   }
 
   if (!CriticalUncountedExitConditionLoad) {
     reportVectorizationFailure(
-        "Early exit loop with store but no condition load",
-        "Cannot vectorize early exit loop with store but no condition load",
+        "Early exit loop with store but no supported condition load",
         "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
     return false;
   }
@@ -1936,8 +1938,6 @@ bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
     reportVectorizationFailure(
         "No dependencies allowed for critical early exit condition load "
         "in a loop with side effects",
-        "Critical Early exit condition loads in a loop with side effects "
-        "may not have a dependence with another memory operation.",
         "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, TheLoop);
     return false;
   }
@@ -2014,8 +2014,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
     } else {
       if (!isVectorizableEarlyExitLoop()) {
         assert(!hasUncountableEarlyExit() &&
+               !hasUncountedExitWithSideEffects() &&
                "Must be false without vectorizable early-exit loop");
-        clearEarlyExitData();
         if (DoExtraAnalysis)
           Result = false;
         else
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index d938f250c11c4..b46a1e70d158e 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -4,7 +4,7 @@
 
 define i64 @loop_contains_store(ptr %dest) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
-; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no supported condition load.
 entry:
   %p1 = alloca [1024 x i8]
   call void @init_mem(ptr %p1, i64 1024)
@@ -57,7 +57,7 @@ exit:
 
 define void @loop_contains_store_ee_condition_is_invariant(ptr dereferenceable(40) noalias %array, i16 %ee.val) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_ee_condition_is_invariant'
-; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no supported condition load.
 entry:
   br label %for.body
 
@@ -81,7 +81,7 @@ exit:
 
 define void @loop_contains_store_fcmp_condition(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_fcmp_condition'
-; CHECK:       LV: Not vectorizing: Early exit loop with store but no condition load.
+; CHECK:       LV: Not vectorizing: Early exit loop with store but no supported condition load.
 entry:
   br label %for.body
 
@@ -461,5 +461,36 @@ exit:
   ret void
 }
 
+define void @loop_contains_store_uncounted_exit_is_not_guaranteed_to_execute(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_uncounted_exit_is_not_guaranteed_to_execute'
+; CHECK:       LV: Not vectorizing: Early exit is not the latch predecessor.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %rem = urem i64 %iv, 5
+  %skip.ee.cmp = icmp eq i64 %rem, 0
+  br i1 %skip.ee.cmp, label %for.inc, label %ee.block
+
+ee.block:
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 declare void @init_mem(ptr, i64);
 declare i64 @get_an_unknown_offset();

>From 9c5436a7658b18ed32ff1fb2a5a7448ea484aaf3 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 20 Aug 2025 15:42:35 +0000
Subject: [PATCH 13/18] Use AA directly instead of through dependencies

---
 .../llvm/Analysis/LoopAccessAnalysis.h        |   2 +
 .../Vectorize/LoopVectorizationLegality.h     |  20 ++--
 .../Vectorize/LoopVectorizationLegality.cpp   | 102 +++++++++---------
 .../early_exit_store_legality.ll              |  31 +++++-
 4 files changed, 87 insertions(+), 68 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 52ab38583d5de..964fc2523d8a1 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -973,6 +973,8 @@ class LoopAccessInfoManager {
 
   LLVM_ABI void clear();
 
+  LLVM_ABI AAResults *getAAResults() { return &AA; }
+
   LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA,
                            FunctionAnalysisManager::Invalidator &Inv);
 };
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 2d300081527e6..5f0aaad8badbb 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -409,10 +409,10 @@ class LoopVectorizationLegality {
 
   /// Returns true if this is an early exit loop with state-changing or
   /// potentially-faulting operations and the IR representing the condition
-  /// for the uncounted exit must be determined before any of the state changes
-  /// or potentially faulting operations take place.
-  bool hasUncountedExitWithSideEffects() const {
-    return UncountedExitWithSideEffects;
+  /// for the uncountable exit must be determined before any of the state
+  /// changes or potentially faulting operations take place.
+  bool hasUncountableExitWithSideEffects() const {
+    return UncountableExitWithSideEffects;
   }
 
   /// Return true if there is store-load forwarding dependencies.
@@ -596,7 +596,7 @@ class LoopVectorizationLegality {
   ///   %cmp.next = icmp eq <4 x i32> %ci.next, zeroinitializer
   ///   %any.of.next = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.next)
   ///   iv.next compared with shortened vector tripcount...
-  ///   uncounted condition combined with counted condition...
+  ///   uncountable condition combined with counted condition...
   ///   br...
   ///
   /// Doing this means the last few iterations will always be performed by a
@@ -608,10 +608,10 @@ class LoopVectorizationLegality {
   /// before other memory operations (or any other observable side effects) in
   /// the loop.
   ///
-  /// Currently, c[i] should only have one user (the comparison used for the
-  /// uncounted exit) since we would otherwise need to introduce a PHI node
+  /// Currently, c[i] must have only one user (the comparison used for the
+  /// uncountable exit) since we would otherwise need to introduce a PHI node
   /// for it.
-  bool canUncountedExitConditionLoadBeMoved(BasicBlock *ExitingBlock);
+  bool canUncountableExitConditionLoadBeMoved(BasicBlock *ExitingBlock);
 
   /// Return true if all of the instructions in the block can be speculatively
   /// executed, and record the loads/stores that require masking.
@@ -735,9 +735,9 @@ class LoopVectorizationLegality {
   /// exit.
   BasicBlock *UncountableExitingBB = nullptr;
 
-  /// If true, the loop has at least one uncounted exit and operations within
+  /// If true, the loop has at least one uncountable exit and operations within
   /// the loop may have observable side effects.
-  bool UncountedExitWithSideEffects = false;
+  bool UncountableExitWithSideEffects = false;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 9fc66a67a0d09..5f365e74a59d6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -15,6 +15,7 @@
 //
 
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MustExecute.h"
@@ -1225,14 +1226,17 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   }
 
   if (!LAI->canVectorizeMemory()) {
-    if (!hasUncountedExitWithSideEffects())
-      return canVectorizeIndirectUnsafeDependences();
-    reportVectorizationFailure(
-        "Cannot vectorize unsafe dependencies in early exit loop with "
-        "side effects.",
-        "Unable to vectorize memory in an early exit loop with side effects",
-        "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, TheLoop);
-    return false;
+    if (hasUncountableExitWithSideEffects()) {
+      reportVectorizationFailure(
+          "Cannot vectorize unsafe dependencies in early exit loop with "
+          "side effects.",
+          "Unable to vectorize memory in an early exit loop with side effects",
+          "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
+          TheLoop);
+      return false;
+    }
+
+    return canVectorizeIndirectUnsafeDependences();
   }
 
   if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
@@ -1806,7 +1810,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
                                  "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
       return false;
     }
-  } else if (!canUncountedExitConditionLoadBeMoved(
+  } else if (!canUncountableExitConditionLoadBeMoved(
                  SingleUncountableExitingBlock))
     return false;
 
@@ -1839,16 +1843,15 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
                        "backedge taken count: "
                     << *SymbolicMaxBTC << '\n');
   UncountableExitingBB = SingleUncountableExitingBlock;
-  UncountedExitWithSideEffects = HasSideEffects;
+  UncountableExitWithSideEffects = HasSideEffects;
   return true;
 }
 
-bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
+bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
     BasicBlock *ExitingBlock) {
-  SmallVector<const SCEVPredicate *, 4> Predicates;
-  LoadInst *CriticalUncountedExitConditionLoad = nullptr;
+  LoadInst *CriticalUncountableExitConditionLoad = nullptr;
 
-  // Try to find a load in the critical path for the uncounted exit condition.
+  // Try to find a load in the critical path for the uncountable exit condition.
   // This is currently matching about the simplest form we can, expecting
   // only one in-loop load, the result of which is directly compared against
   // a loop-invariant value.
@@ -1867,7 +1870,7 @@ bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
       const SCEV *PtrScev = PSE.getSE()->getSCEV(Load->getPointerOperand());
       if (PSE.getSE()->isLoopInvariant(PtrScev, TheLoop)) {
         reportVectorizationFailure(
-            "Uncounted exit condition depends on load from invariant address",
+            "Uncountable exit condition depends on load from invariant address",
             "EarlyExitLoadInvariantAddress", ORE, TheLoop);
         return false;
       }
@@ -1877,6 +1880,7 @@ bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
       // with a constant step. In either case, we're not relying on another
       // load within the loop.
       // FIXME: Support gathers after first-faulting load support lands.
+      SmallVector<const SCEVPredicate *, 4> Predicates;
       if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT,
                                             AC, &Predicates)) {
         ICFLoopSafetyInfo SafetyInfo;
@@ -1884,11 +1888,13 @@ bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
         // We need to know that load will be executed before we can hoist a
         // copy out to run just before the first iteration.
         if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
-          CriticalUncountedExitConditionLoad = Load;
-        else
+          CriticalUncountableExitConditionLoad = Load;
+        else {
           reportVectorizationFailure(
               "Early exit condition load not guaranteed to execute",
               "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+          return false;
+        }
       } else {
         reportVectorizationFailure(
             "Loop may fault",
@@ -1899,47 +1905,37 @@ bool LoopVectorizationLegality::canUncountedExitConditionLoadBeMoved(
     }
   }
 
-  if (!CriticalUncountedExitConditionLoad) {
+  if (!CriticalUncountableExitConditionLoad) {
     reportVectorizationFailure(
         "Early exit loop with store but no supported condition load",
         "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
     return false;
   }
 
-  // We're in a bit of an odd spot since we're (potentially) doing the load
-  // out of its normal order in the loop and that may throw off dependency
-  // checking. A forward dependency should be fine, but a backwards dep may not
-  // be even if LAA thinks it is due to performing the load for the vector
-  // iteration i+1 in vector iteration i.
-  // In any case, prohibit vectorization if there are any loop-carried
-  // dependencies on the critical load.
+  // Prohibit any potential aliasing with any instruction in the loop which
+  // might store to memory.
   // FIXME: Relax this constraint where possible.
-  LAI = &LAIs.getInfo(*TheLoop);
-  const MemoryDepChecker &DepChecker = LAI->getDepChecker();
-  const auto *Deps = DepChecker.getDependences();
-  if (!Deps) {
-    // We may have exceeded the allowed number of dependencies to track, and
-    // given up. Just bail out since we can't be sure.
-    reportVectorizationFailure(
-        "Invalid memory dependencies result",
-        "Unable to determine memory dependencies for an early exit loop with "
-        "side effects.",
-        "CantVectorizeInvalidDependencesForEELoopsWithSideEffects", ORE,
-        TheLoop);
-    return false;
-  }
+  AAResults *AA = LAIs.getAAResults();
+  Value *Ptr = CriticalUncountableExitConditionLoad->getPointerOperand();
+  for (auto *BB : TheLoop->blocks()) {
+    for (auto &I : *BB) {
+      if (&I == CriticalUncountableExitConditionLoad)
+        continue;
 
-  if (any_of(*Deps, [&](const MemoryDepChecker::Dependence &Dep) {
-        return (Dep.getDestination(DepChecker) ==
-                    CriticalUncountedExitConditionLoad ||
-                Dep.getSource(DepChecker) ==
-                    CriticalUncountedExitConditionLoad);
-      })) {
-    reportVectorizationFailure(
-        "No dependencies allowed for critical early exit condition load "
-        "in a loop with side effects",
-        "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE, TheLoop);
-    return false;
+      if (I.mayWriteToMemory()) {
+        if (auto *SI = dyn_cast<StoreInst>(&I)) {
+          AliasResult AR = AA->alias(Ptr, SI->getPointerOperand());
+          if (AR == AliasResult::NoAlias)
+            continue;
+        }
+
+        reportVectorizationFailure(
+            "Cannot determine whether critical uncountable exit load address "
+            "does not alias with a memory write",
+            "CantVectorizeAliasWithCriticalUncountableExitLoad", ORE, TheLoop);
+        return false;
+      }
+    }
   }
 
   return true;
@@ -2014,7 +2010,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
     } else {
       if (!isVectorizableEarlyExitLoop()) {
         assert(!hasUncountableEarlyExit() &&
-               !hasUncountedExitWithSideEffects() &&
+               !hasUncountableExitWithSideEffects() &&
                "Must be false without vectorizable early-exit loop");
         if (DoExtraAnalysis)
           Result = false;
@@ -2033,8 +2029,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
-  // Bail out for state-changing EE loops for now.
-  if (UncountedExitWithSideEffects) {
+  // Bail out for state-changing loops with uncountable exits for now.
+  if (UncountableExitWithSideEffects) {
     reportVectorizationFailure(
         "Writes to memory unsupported in early exit loops",
         "Cannot vectorize early exit loop with writes to memory",
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index b46a1e70d158e..38e822532c535 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -1,6 +1,5 @@
 ; REQUIRES: asserts
-; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 | FileCheck %s --check-prefixes=CHECK,NRMDEPS
-; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 -max-dependences=1 | FileCheck %s --check-prefixes=CHECK,MAXDEP1
+; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 -disable-output 2>&1 | FileCheck %s
 
 define i64 @loop_contains_store(ptr %dest) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store'
@@ -107,8 +106,7 @@ exit:
 
 define void @loop_contains_store_safe_dependency(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(96) %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_safe_dependency'
-; NRMDEPS:     LV: Not vectorizing: No dependencies allowed for critical early exit condition load in a loop with side effects.
-; MAXDEP1:     LV: Not vectorizing: Invalid memory dependencies result.
+; CHECK:       LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
 entry:
   %pred.plus.8 = getelementptr inbounds nuw i16, ptr %pred, i64 8
   br label %for.body
@@ -329,7 +327,7 @@ exit:
 
 define void @loop_contains_store_requiring_alias_check(ptr dereferenceable(40) %array, ptr align 2 dereferenceable(40) %pred) {
 ; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store_requiring_alias_check'
-; CHECK:       LV: Not vectorizing: Writes to memory unsupported in early exit loops.
+; CHECK:       LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
 entry:
   br label %for.body
 
@@ -492,5 +490,28 @@ exit:
   ret void
 }
 
+define void @test_nodep(ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'test_nodep'
+; CHECK:       LV: Not vectorizing: Cannot determine whether critical uncountable exit load address does not alias with a memory write.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  store i16 0, ptr %st.addr, align 2
+  %ee.val = load i16, ptr %st.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 declare void @init_mem(ptr, i64);
 declare i64 @get_an_unknown_offset();

>From 022f3e64fcfda18aedcf754e8717eafdf5f05d8f Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Thu, 21 Aug 2025 10:50:47 +0000
Subject: [PATCH 14/18] Flatten exec guarantee check a bit

---
 .../Vectorize/LoopVectorizationLegality.cpp   | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 5f365e74a59d6..a1fd9a40a0da7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1881,27 +1881,27 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
       // load within the loop.
       // FIXME: Support gathers after first-faulting load support lands.
       SmallVector<const SCEVPredicate *, 4> Predicates;
-      if (isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT,
-                                            AC, &Predicates)) {
-        ICFLoopSafetyInfo SafetyInfo;
-        SafetyInfo.computeLoopSafetyInfo(TheLoop);
-        // We need to know that load will be executed before we can hoist a
-        // copy out to run just before the first iteration.
-        if (SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop))
-          CriticalUncountableExitConditionLoad = Load;
-        else {
-          reportVectorizationFailure(
-              "Early exit condition load not guaranteed to execute",
-              "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
-          return false;
-        }
-      } else {
+      if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT,
+                                             AC, &Predicates)) {
         reportVectorizationFailure(
             "Loop may fault",
             "Cannot vectorize potentially faulting early exit loop",
             "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
         return false;
       }
+
+      ICFLoopSafetyInfo SafetyInfo;
+      SafetyInfo.computeLoopSafetyInfo(TheLoop);
+      // We need to know that load will be executed before we can hoist a
+      // copy out to run just before the first iteration.
+      if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+        reportVectorizationFailure(
+            "Early exit condition load not guaranteed to execute",
+            "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+        return false;
+      }
+
+      CriticalUncountableExitConditionLoad = Load;
     }
   }
 

>From 21a568276599c82c8fc6592fb960a17d6bbafcbf Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 26 Aug 2025 10:26:31 +0000
Subject: [PATCH 15/18] Flatten condition IR detection

---
 .../Vectorize/LoopVectorizationLegality.cpp   | 95 +++++++++++--------
 .../early_exit_store_legality.ll              | 54 +++++++++++
 2 files changed, 108 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index a1fd9a40a0da7..c0561e0fca62e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1228,9 +1228,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   if (!LAI->canVectorizeMemory()) {
     if (hasUncountableExitWithSideEffects()) {
       reportVectorizationFailure(
-          "Cannot vectorize unsafe dependencies in early exit loop with "
-          "side effects.",
-          "Unable to vectorize memory in an early exit loop with side effects",
+          "Cannot vectorize unsafe dependencies in uncountable exit loop with "
+          "side effects",
           "CantVectorizeUnsafeDependencyForEELoopWithSideEffects", ORE,
           TheLoop);
       return false;
@@ -1860,49 +1859,63 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
   // transform code lands.
   auto *Br = cast<BranchInst>(ExitingBlock->getTerminator());
 
+  using namespace llvm::PatternMatch;
+  Value *L = nullptr;
+  Value *R = nullptr;
+  if (!match(Br->getCondition(),
+             m_OneUse(m_ICmp(m_OneUse(m_Value(L)), (m_Value(R)))))) {
+    reportVectorizationFailure(
+        "Early exit loop with store but no supported condition load",
+        "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
+
   // FIXME: Don't rely on operand ordering for the comparison.
-  ICmpInst *Cmp = dyn_cast<ICmpInst>(Br->getCondition());
-  if (Cmp && Cmp->hasOneUse() && TheLoop->isLoopInvariant(Cmp->getOperand(1))) {
-    LoadInst *Load = dyn_cast<LoadInst>(Cmp->getOperand(0));
-    if (Load && Load->hasOneUse()) {
-      // Make sure that the load address is not loop invariant; we want an
-      // address calculation that we can rotate to the next vector iteration.
-      const SCEV *PtrScev = PSE.getSE()->getSCEV(Load->getPointerOperand());
-      if (PSE.getSE()->isLoopInvariant(PtrScev, TheLoop)) {
-        reportVectorizationFailure(
-            "Uncountable exit condition depends on load from invariant address",
-            "EarlyExitLoadInvariantAddress", ORE, TheLoop);
-        return false;
-      }
+  if (!TheLoop->isLoopInvariant(R)) {
+    reportVectorizationFailure(
+        "Early exit loop with store but no supported condition load",
+        "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
+    return false;
+  }
 
-      // The following call also checks that the load address is either
-      // invariant (which we've just ruled out) or is an affine SCEVAddRecExpr
-      // with a constant step. In either case, we're not relying on another
-      // load within the loop.
-      // FIXME: Support gathers after first-faulting load support lands.
-      SmallVector<const SCEVPredicate *, 4> Predicates;
-      if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT,
-                                             AC, &Predicates)) {
-        reportVectorizationFailure(
-            "Loop may fault",
-            "Cannot vectorize potentially faulting early exit loop",
-            "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
-        return false;
-      }
+  if (auto *Load = dyn_cast<LoadInst>(L)) {
+    // Make sure that the load address is not loop invariant; we want an
+    // address calculation that we can rotate to the next vector iteration.
+    const SCEV *PtrScev = PSE.getSE()->getSCEV(Load->getPointerOperand());
+    if (PSE.getSE()->isLoopInvariant(PtrScev, TheLoop)) {
+      reportVectorizationFailure(
+          "Uncountable exit condition depends on load from invariant address",
+          "EarlyExitLoadInvariantAddress", ORE, TheLoop);
+      return false;
+    }
 
-      ICFLoopSafetyInfo SafetyInfo;
-      SafetyInfo.computeLoopSafetyInfo(TheLoop);
-      // We need to know that load will be executed before we can hoist a
-      // copy out to run just before the first iteration.
-      if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
-        reportVectorizationFailure(
-            "Early exit condition load not guaranteed to execute",
-            "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
-        return false;
-      }
+    // The following call also checks that the load address is either
+    // invariant (which we've just ruled out) or is an affine SCEVAddRecExpr
+    // with a constant step. In either case, we're not relying on another
+    // load within the loop.
+    // FIXME: Support gathers after first-faulting load support lands.
+    SmallVector<const SCEVPredicate *, 4> Predicates;
+    if (!isDereferenceableAndAlignedInLoop(Load, TheLoop, *PSE.getSE(), *DT, AC,
+                                           &Predicates)) {
+      reportVectorizationFailure(
+          "Loop may fault",
+          "Cannot vectorize potentially faulting early exit loop",
+          "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+      return false;
+    }
 
-      CriticalUncountableExitConditionLoad = Load;
+    ICFLoopSafetyInfo SafetyInfo;
+    SafetyInfo.computeLoopSafetyInfo(TheLoop);
+    // We need to know that load will be executed before we can hoist a
+    // copy out to run just before the first iteration.
+    if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
+      reportVectorizationFailure(
+          "Early exit condition load not guaranteed to execute",
+          "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
+      return false;
     }
+
+    CriticalUncountableExitConditionLoad = Load;
   }
 
   if (!CriticalUncountableExitConditionLoad) {
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 38e822532c535..48d751b126c95 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -513,5 +513,59 @@ exit:
   ret void
 }
 
+define void @histogram_with_uncountable_exit(ptr noalias %buckets, ptr readonly %indices, ptr align 2 dereferenceable(40) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'histogram_with_uncountable_exit'
+; CHECK:       LV: Not vectorizing: Cannot vectorize unsafe dependencies in uncountable exit loop with side effects.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %gep.indices = getelementptr inbounds i32, ptr %indices, i64 %iv
+  %l.idx = load i32, ptr %gep.indices, align 4
+  %idxprom1 = zext i32 %l.idx to i64
+  %gep.bucket = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
+  %l.bucket = load i32, ptr %gep.bucket, align 4
+  %inc = add nsw i32 %l.bucket, 1
+  store i32 %inc, ptr %gep.bucket, align 4
+  %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv
+  %ee.val = load i16, ptr %ee.addr, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+define void @uncountable_exit_condition_address_is_invariant(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(2) readonly %pred) {
+; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_condition_address_is_invariant'
+; CHECK:       LV: Not vectorizing: Uncountable exit condition depends on load from invariant address.
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
+  %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv
+  %data = load i16, ptr %st.addr, align 2
+  %inc = add nsw i16 %data, 1
+  store i16 %inc, ptr %st.addr, align 2
+  %ee.val = load i16, ptr %pred, align 2
+  %ee.cond = icmp sgt i16 %ee.val, 500
+  br i1 %ee.cond, label %exit, label %for.inc
+
+for.inc:
+  %iv.next = add nuw nsw i64 %iv, 1
+  %counted.cond = icmp eq i64 %iv.next, 20
+  br i1 %counted.cond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
 declare void @init_mem(ptr, i64);
 declare i64 @get_an_unknown_offset();

>From e80821ee3c969513859ebd0fb91cc6e1fd2ec252 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 27 Aug 2025 10:26:00 +0000
Subject: [PATCH 16/18] Assert for execute guarantee check for now

---
 .../Vectorize/LoopVectorizationLegality.cpp           | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index c0561e0fca62e..db64ace6a4a54 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1908,12 +1908,11 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
     SafetyInfo.computeLoopSafetyInfo(TheLoop);
     // We need to know that load will be executed before we can hoist a
     // copy out to run just before the first iteration.
-    if (!SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop)) {
-      reportVectorizationFailure(
-          "Early exit condition load not guaranteed to execute",
-          "EarlyExitLoadNotGuaranteed", ORE, TheLoop);
-      return false;
-    }
+    // FIXME: Currently, other restrictions prevent us from reaching this point
+    //        with a loop where the uncountable exit condition is determined
+    //        by a conditional load.
+    assert(SafetyInfo.isGuaranteedToExecute(*Load, DT, TheLoop) &&
+           "Unhandled control flow in uncountable exit loop with side effects");
 
     CriticalUncountableExitConditionLoad = Load;
   }

>From 2233dcf9fb7466fdc09636ae880310842d0a2670 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Mon, 1 Sep 2025 11:48:15 +0000
Subject: [PATCH 17/18] Obtain AA via LoopVectorize

---
 .../llvm/Analysis/LoopAccessAnalysis.h        |  2 --
 .../Vectorize/LoopVectorizationLegality.h     | 23 ++++++++++++-------
 .../llvm/Transforms/Vectorize/LoopVectorize.h |  1 +
 .../Vectorize/LoopVectorizationLegality.cpp   |  3 ---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  3 ++-
 5 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 964fc2523d8a1..52ab38583d5de 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -973,8 +973,6 @@ class LoopAccessInfoManager {
 
   LLVM_ABI void clear();
 
-  LLVM_ABI AAResults *getAAResults() { return &AA; }
-
   LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA,
                            FunctionAnalysisManager::Invalidator &Inv);
 };
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 5f0aaad8badbb..69770e9e2b940 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -251,15 +251,18 @@ struct HistogramInfo {
 /// induction variable and the different reduction variables.
 class LoopVectorizationLegality {
 public:
-  LoopVectorizationLegality(
-      Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
-      TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F,
-      LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE,
-      LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
-      AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+  LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
+                            DominatorTree *DT, TargetTransformInfo *TTI,
+                            TargetLibraryInfo *TLI, Function *F,
+                            LoopAccessInfoManager &LAIs, LoopInfo *LI,
+                            OptimizationRemarkEmitter *ORE,
+                            LoopVectorizationRequirements *R,
+                            LoopVectorizeHints *H, DemandedBits *DB,
+                            AssumptionCache *AC, BlockFrequencyInfo *BFI,
+                            ProfileSummaryInfo *PSI, AAResults *AA)
       : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs),
-        ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI),
-        PSI(PSI) {}
+        ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI),
+        AA(AA) {}
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
@@ -721,6 +724,10 @@ class LoopVectorizationLegality {
   BlockFrequencyInfo *BFI;
   ProfileSummaryInfo *PSI;
 
+  // Alias Analysis results used to check for possible aliasing with loads
+  // used in uncountable exit conditions.
+  AAResults *AA;
+
   /// If we discover function calls within the loop which have a valid
   /// vectorized variant, record that fact so that LoopVectorize can
   /// (potentially) make a better decision on the maximum VF and enable
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index db1971aca4bff..bdc2a0dad8622 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -152,6 +152,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
   LoopAccessInfoManager *LAIs;
   OptimizationRemarkEmitter *ORE;
   ProfileSummaryInfo *PSI;
+  AAResults *AA;
 
   LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
   LLVM_ABI void
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index db64ace6a4a54..383afeaee2350 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1830,8 +1830,6 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
                       << "\n");
   }
 
-
-
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
       PSE.getSymbolicMaxBackedgeTakenCount();
   // Since we have an exact exit count for the latch and the early exit
@@ -1927,7 +1925,6 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
   // Prohibit any potential aliasing with any instruction in the loop which
   // might store to memory.
   // FIXME: Relax this constraint where possible.
-  AAResults *AA = LAIs.getAAResults();
   Value *Ptr = CriticalUncountableExitConditionLoad->getPointerOperand();
   for (auto *BB : TheLoop->blocks()) {
     for (auto &I : *BB) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dd4b3f8e3077b..10e056b14baac 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9811,7 +9811,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // Check if it is legal to vectorize the loop.
   LoopVectorizationRequirements Requirements;
   LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, F, *LAIs, LI, ORE,
-                                &Requirements, &Hints, DB, AC, BFI, PSI);
+                                &Requirements, &Hints, DB, AC, BFI, PSI, AA);
   if (!LVL.canVectorize(EnableVPlanNativePath)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
     Hints.emitRemarkWithHints();
@@ -10248,6 +10248,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
   DB = &AM.getResult<DemandedBitsAnalysis>(F);
   ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
   LAIs = &AM.getResult<LoopAccessAnalysis>(F);
+  AA = &AM.getResult<AAManager>(F);
 
   auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
   PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());

>From 83e10d9949b8899f11059f048caf6e614ac834a6 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 9 Sep 2025 11:01:33 +0000
Subject: [PATCH 18/18] formatting fixup after rebase

---
 .../Transforms/Vectorize/LoopVectorizationLegality.cpp    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 383afeaee2350..bb4c009b3dd47 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1803,10 +1803,10 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     // Read-only loop.
     Predicates.clear();
     if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
-                      &Predicates)) {
-      reportVectorizationFailure("Loop may fault",
-                                 "Cannot vectorize non-read-only early exit loop",
-                                 "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
+                        &Predicates)) {
+      reportVectorizationFailure(
+          "Loop may fault", "Cannot vectorize non-read-only early exit loop",
+          "NonReadOnlyEarlyExitLoop", ORE, TheLoop);
       return false;
     }
   } else if (!canUncountableExitConditionLoadBeMoved(



More information about the llvm-commits mailing list