[llvm] [LV] Provide utility routine to find uncounted exit recipes (PR #152530)

Graham Hunter via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 16 07:16:39 PDT 2025


https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/152530

>From 7d755295dd33c6ac01675bb4f7e2dae6ea3c80d0 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 30 Jul 2025 09:23:13 +0000
Subject: [PATCH 1/4] WIP

---
 .../Transforms/Vectorize/VPlanPatternMatch.h  |  31 ++++++
 llvm/lib/Transforms/Vectorize/VPlanUtils.cpp  | 100 ++++++++++++++++++
 llvm/lib/Transforms/Vectorize/VPlanUtils.h    |   8 ++
 llvm/lib/Transforms/Vectorize/VPlanValue.h    |   2 +
 .../Transforms/Vectorize/CMakeLists.txt       |   1 +
 .../Vectorize/VPlanUncountedExitTest.cpp      |  99 +++++++++++++++++
 6 files changed, 241 insertions(+)
 create mode 100644 llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 401a2cbd9a5ca..1663aedb62c8e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -703,6 +703,37 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
   return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
 }
 
+struct loop_invariant_vpvalue {
+  template <typename ITy> bool match(ITy *V) const {
+    VPValue *Val = dyn_cast<VPValue>(V);
+    return Val && Val->isDefinedOutsideLoopRegions();
+  }
+};
+
+inline loop_invariant_vpvalue m_LoopInvVPValue() {
+  return loop_invariant_vpvalue();
+}
+
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+m_AnyOf(const Op0_t &Op0) {
+  return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
+template <typename SubPattern_t> struct OneUse_match {
+  SubPattern_t SubPattern;
+
+  OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    return V->hasOneUse() && SubPattern.match(V);
+  }
+};
+
+template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
+  return SubPattern;
+}
+
 } // namespace VPlanPatternMatch
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index ddc4ad1977401..b322b5b44b230 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -141,3 +141,103 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
   });
   return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
 }
+
+std::optional<VPValue *> vputils::getRecipesForUncountedExit(
+    VPlan &Plan, SmallVectorImpl<VPRecipeBase *> &Recipes,
+    SmallVectorImpl<VPReplicateRecipe *> &GEPs) {
+  using namespace llvm::VPlanPatternMatch;
+  // Given a vplan like the following (just including the recipes contributing
+  // to loop control exiting here, not the actual work), we're looking to match
+  // the recipes contributing to the uncounted exit condition comparison
+  // (here, vp<%4>) back to the canonical induction for the vector body so that
+  // we can copy them to a preheader and rotate the address in the loop to the
+  // next vector iteration.
+  //
+  // VPlan ' for UF>=1' {
+  // Live-in vp<%0> = VF
+  // Live-in ir<64> = original trip-count
+  //
+  // entry:
+  // Successor(s): preheader, vector.ph
+  //
+  // vector.ph:
+  // Successor(s): vector loop
+  //
+  // <x1> vector loop: {
+  //   vector.body:
+  //     EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
+  //     vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
+  //     CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
+  //     WIDEN ir<%ee.load> = load ir<%ee.addr>
+  //     WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
+  //     EMIT vp<%5> = any-of vp<%4>
+  //     EMIT vp<%6> = add vp<%2>, vp<%0>
+  //     EMIT vp<%7> = icmp eq vp<%6>, ir<64>
+  //     EMIT vp<%8> = or vp<%5>, vp<%7>
+  //     EMIT branch-on-cond vp<%8>
+  //   No successors
+  // }
+  // Successor(s): middle.block
+  //
+  // middle.block:
+  // Successor(s): preheader
+  //
+  // preheader:
+  // No successors
+  // }
+
+  // Find the uncounted loop exit condition.
+  auto *Region = Plan.getVectorLoopRegion();
+  VPValue *UncountedCondition = nullptr;
+  if (!match(
+          Region->getExitingBasicBlock()->getTerminator(),
+          m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+              m_OneUse(m_AnyOf(m_VPValue(UncountedCondition))), m_VPValue())))))
+    return std::nullopt;
+
+  SmallVector<VPValue *, 4> Worklist;
+  bool LoadFound = false;
+  Worklist.push_back(UncountedCondition);
+  while (!Worklist.empty()) {
+    VPValue *V = Worklist.pop_back_val();
+
+    if (V->isDefinedOutsideLoopRegions())
+      continue;
+    if (V->getNumUsers() > 1)
+      return std::nullopt;
+
+    if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
+      if (Cmp->getOpcode() != Instruction::ICmp)
+        return std::nullopt;
+      Worklist.push_back(Cmp->getOperand(0));
+      Worklist.push_back(Cmp->getOperand(1));
+      Recipes.push_back(Cmp);
+    } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
+      if (!Load->isConsecutive() || Load->isMasked())
+        return std::nullopt;
+      Worklist.push_back(Load->getAddr());
+      Recipes.push_back(Load);
+      LoadFound = true;
+    } else if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(V)) {
+      Worklist.push_back(VecPtr->getOperand(0));
+      Recipes.push_back(VecPtr);
+    } else if (auto *GEP = dyn_cast<VPReplicateRecipe>(V)) {
+      if (GEP->getNumOperands() != 2)
+        return std::nullopt;
+      if (!match(GEP, m_GetElementPtr(
+                          m_LoopInvVPValue(),
+                          m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
+                                          m_SpecificInt(1),
+                                          m_Specific(&Plan.getVF())))))
+        return std::nullopt;
+      GEPs.push_back(GEP);
+      Recipes.push_back(GEP);
+    } else
+      return std::nullopt;
+  }
+
+  if (GEPs.empty() || !LoadFound)
+    return std::nullopt;
+
+  return UncountedCondition;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 77c099b271717..503f599586489 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -101,6 +101,14 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
 /// Returns the header block of the first, top-level loop, or null if none
 /// exist.
 VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
+
+/// Returns the VPValue representing the uncounted exit comparison if all the
+/// recipes needed to form the condition within the vector loop body were
+/// matched.
+std::optional<VPValue *>
+getRecipesForUncountedExit(VPlan &Plan,
+                           SmallVectorImpl<VPRecipeBase *> &Recipes,
+                           SmallVectorImpl<VPReplicateRecipe *> &GEPs);
 } // namespace vputils
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 85c6c2c8d7965..0678bc90ef4b5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -148,6 +148,8 @@ class LLVM_ABI_FOR_TEST VPValue {
     return Current != user_end();
   }
 
+  bool hasOneUse() const { return getNumUsers() == 1; }
+
   void replaceAllUsesWith(VPValue *New);
 
   /// Go through the uses list for this VPValue and make each use point to \p
diff --git a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
index 53eeff28c185f..a7254922af007 100644
--- a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -14,5 +14,6 @@ add_llvm_unittest(VectorizeTests
   VPlanHCFGTest.cpp
   VPlanPatternMatchTest.cpp
   VPlanSlpTest.cpp
+  VPlanUncountedExitTest.cpp
   VPlanVerifierTest.cpp
   )
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
new file mode 100644
index 0000000000000..81ef67a0fb923
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
@@ -0,0 +1,99 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp -----===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/LoopVectorizationPlanner.h"
+#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanPatternMatch.h"
+#include "../lib/Transforms/Vectorize/VPlanUtils.h"
+#include "VPlanTestBase.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+namespace {
+class VPUncountedExitTest : public VPlanTestBase {};
+
+TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
+  // Create CFG skeleton.
+  VPlan &Plan = getPlan();
+  VPBasicBlock *ScalarPH = Plan.getEntry();
+  VPBasicBlock *Entry = Plan.createVPBasicBlock("entry");
+  Plan.setEntry(Entry);
+  VPBasicBlock *VectorPH = Plan.createVPBasicBlock("vector.ph");
+  VPBasicBlock *VecBody = Plan.createVPBasicBlock("vector.body");
+  VPRegionBlock *Region =
+      Plan.createVPRegionBlock(VecBody, VecBody, "vector loop");
+  VPBasicBlock *MiddleBlock = Plan.createVPBasicBlock("middle.block");
+  VPBlockUtils::connectBlocks(Entry, ScalarPH);
+  VPBlockUtils::connectBlocks(Entry, VectorPH);
+  VPBlockUtils::connectBlocks(VectorPH, Region);
+  VPBlockUtils::connectBlocks(Region, MiddleBlock);
+  VPBlockUtils::connectBlocks(MiddleBlock, ScalarPH);
+
+  // Live-Ins
+  IntegerType *I64Ty = IntegerType::get(C, 64);
+  IntegerType *I32Ty = IntegerType::get(C, 32);
+  PointerType *PTy = PointerType::get(C, 0);
+  VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0));
+  VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1));
+  VPValue *VF = &Plan.getVF();
+  Plan.setTripCount(Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 64)));
+
+  // Populate vector.body with the recipes for exiting.
+  auto *IV = new VPCanonicalIVPHIRecipe(Zero, {});
+  VecBody->appendRecipe(IV);
+  VPBuilder Builder(VecBody, VecBody->getFirstNonPhi());
+  auto *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, IV, Inc,
+                                            VF, DebugLoc());
+
+  // Uncounted Exit; GEP -> Load -> Cmp
+  auto *DummyGEP = GetElementPtrInst::Create(I32Ty, Zero->getUnderlyingValue(),
+                                             {}, Twine("ee.addr"));
+  auto *GEP = new VPReplicateRecipe(DummyGEP, {Zero, Steps}, true, nullptr);
+  Builder.insert(GEP);
+  auto *DummyLoad =
+      new LoadInst(I32Ty, PoisonValue::get(PTy), "ee.load", false, Align(1));
+  VPValue *Load =
+      new VPWidenLoadRecipe(*DummyLoad, GEP, nullptr, true, false, {}, {});
+  Builder.insert(Load->getDefiningRecipe());
+  // Should really splat the zero, but we're not checking types here.
+  VPValue *Cmp = new VPWidenRecipe(Instruction::ICmp, {Load, Zero},
+                                   VPIRFlags(CmpInst::ICMP_EQ), {}, {});
+  Builder.insert(Cmp->getDefiningRecipe());
+  VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, Cmp);
+
+  // Counted Exit; Inc IV -> Cmp
+  VPValue *NextIV = Builder.createNaryOp(Instruction::Add, {IV, VF});
+  VPValue *Counted =
+      Builder.createICmp(CmpInst::ICMP_EQ, NextIV, Plan.getTripCount());
+
+  // Combine, and branch.
+  VPValue *Combined = Builder.createNaryOp(Instruction::Or, {AnyOf, Counted});
+  Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
+
+  SmallVector<VPRecipeBase *, 8> Recipes;
+  SmallVector<VPReplicateRecipe *, 2> GEPs;
+
+  std::optional<VPValue *> UncountedCondition =
+      vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);
+  ASSERT_TRUE(UncountedCondition.has_value());
+  ASSERT_EQ(*UncountedCondition, Cmp);
+  ASSERT_EQ(GEPs.size(), 1ull);
+  ASSERT_EQ(GEPs[0], GEP);
+  ASSERT_EQ(Recipes.size(), 3ull);
+
+  delete DummyLoad;
+  delete DummyGEP;
+}
+
+} // namespace
+} // namespace llvm

>From 688eadb9c8db9bf9bc7d11f729653761f9179af3 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 12 Aug 2025 10:39:05 +0000
Subject: [PATCH 2/4] Improve comments, be stricter about matching inputs to
 loads

---
 llvm/lib/Transforms/Vectorize/VPlanUtils.cpp  | 55 +++++++++++--------
 llvm/lib/Transforms/Vectorize/VPlanUtils.h    | 10 ++--
 .../Vectorize/VPlanUncountedExitTest.cpp      |  2 +-
 3 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index b322b5b44b230..4aee6ea2073a2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -142,9 +142,10 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
   return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
 }
 
-std::optional<VPValue *> vputils::getRecipesForUncountedExit(
-    VPlan &Plan, SmallVectorImpl<VPRecipeBase *> &Recipes,
-    SmallVectorImpl<VPReplicateRecipe *> &GEPs) {
+std::optional<VPValue *>
+vputils::getRecipesForUncountedExit(VPlan &Plan,
+                                    SmallVectorImpl<VPRecipeBase *> &Recipes,
+                                    SmallVectorImpl<VPRecipeBase *> &GEPs) {
   using namespace llvm::VPlanPatternMatch;
   // Given a vplan like the following (just including the recipes contributing
   // to loop control exiting here, not the actual work), we're looking to match
@@ -196,16 +197,22 @@ std::optional<VPValue *> vputils::getRecipesForUncountedExit(
     return std::nullopt;
 
   SmallVector<VPValue *, 4> Worklist;
-  bool LoadFound = false;
+  SmallVector<VPWidenLoadRecipe *, 1> Loads;
   Worklist.push_back(UncountedCondition);
   while (!Worklist.empty()) {
     VPValue *V = Worklist.pop_back_val();
 
+    // Any value defined outside the loop does not need to be copied.
     if (V->isDefinedOutsideLoopRegions())
       continue;
+
+    // FIXME: Remove the single user restriction; it's here because we're
+    //        starting with the simplest set of loops we can, and multiple
+    //        users means needing to add PHI nodes in the transform.
     if (V->getNumUsers() > 1)
       return std::nullopt;
 
+    // Walk back through recipes until we find at least one load from memory.
     if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
       if (Cmp->getOpcode() != Instruction::ICmp)
         return std::nullopt;
@@ -213,31 +220,31 @@ std::optional<VPValue *> vputils::getRecipesForUncountedExit(
       Worklist.push_back(Cmp->getOperand(1));
       Recipes.push_back(Cmp);
     } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
-      if (!Load->isConsecutive() || Load->isMasked())
-        return std::nullopt;
-      Worklist.push_back(Load->getAddr());
-      Recipes.push_back(Load);
-      LoadFound = true;
-    } else if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(V)) {
-      Worklist.push_back(VecPtr->getOperand(0));
-      Recipes.push_back(VecPtr);
-    } else if (auto *GEP = dyn_cast<VPReplicateRecipe>(V)) {
-      if (GEP->getNumOperands() != 2)
+      // Reject masked loads for the time being; they make the exit condition
+      // more complex.
+      if (Load->isMasked())
         return std::nullopt;
-      if (!match(GEP, m_GetElementPtr(
-                          m_LoopInvVPValue(),
-                          m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
-                                          m_SpecificInt(1),
-                                          m_Specific(&Plan.getVF())))))
-        return std::nullopt;
-      GEPs.push_back(GEP);
-      Recipes.push_back(GEP);
+      Loads.push_back(Load);
     } else
       return std::nullopt;
   }
 
-  if (GEPs.empty() || !LoadFound)
-    return std::nullopt;
+  // Check the loads for exact patterns; for now we only support a contiguous
+  // load based directly on the canonical IV with a step of 1.
+  for (VPWidenLoadRecipe *Load : Loads) {
+    Recipes.push_back(Load);
+    VPValue *GEP = Load->getAddr();
+
+    if (!match(GEP, m_GetElementPtr(
+                        m_LoopInvVPValue(),
+                        m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
+                                        m_SpecificInt(1),
+                                        m_Specific(&Plan.getVF())))))
+      return std::nullopt;
+
+    Recipes.push_back(GEP->getDefiningRecipe());
+    GEPs.push_back(GEP->getDefiningRecipe());
+  }
 
   return UncountedCondition;
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 503f599586489..c5c18c0a91326 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -102,13 +102,15 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
 /// exist.
 VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
 
-/// Returns the VPValue representing the uncounted exit comparison if all the
-/// recipes needed to form the condition within the vector loop body were
-/// matched.
+/// Returns the VPValue representing the uncounted exit comparison used by
+/// AnyOf if the recipes it depends on can be traced back to live-ins and
+/// the canonical IV and it is deemed safe to copy those recipes into the
+/// vector preheader. The recipes are stored in \p Recipes, and recipes
+/// forming an address for a load are also added to \p GEPs.
 std::optional<VPValue *>
 getRecipesForUncountedExit(VPlan &Plan,
                            SmallVectorImpl<VPRecipeBase *> &Recipes,
-                           SmallVectorImpl<VPReplicateRecipe *> &GEPs);
+                           SmallVectorImpl<VPRecipeBase *> &GEPs);
 } // namespace vputils
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
index 81ef67a0fb923..92bd0433e1233 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
@@ -81,7 +81,7 @@ TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
   Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
 
   SmallVector<VPRecipeBase *, 8> Recipes;
-  SmallVector<VPReplicateRecipe *, 2> GEPs;
+  SmallVector<VPRecipeBase *, 2> GEPs;
 
   std::optional<VPValue *> UncountedCondition =
       vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);

>From 3ad198d5ab65a9678c2088f885e1c7d6731e3777 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 3 Sep 2025 13:35:25 +0000
Subject: [PATCH 3/4] Create test vplan from IR; minor cleanups

---
 .../Transforms/Vectorize/VPlanPatternMatch.h  |  2 +-
 llvm/lib/Transforms/Vectorize/VPlanUtils.cpp  | 66 ++++++-------
 llvm/lib/Transforms/Vectorize/VPlanUtils.h    |  8 +-
 .../Transforms/Vectorize/CMakeLists.txt       |  2 +-
 .../Transforms/Vectorize/VPlanTestBase.h      |  4 +-
 .../Vectorize/VPlanUncountableExitTest.cpp    | 65 ++++++++++++
 .../Vectorize/VPlanUncountedExitTest.cpp      | 99 -------------------
 7 files changed, 101 insertions(+), 145 deletions(-)
 create mode 100644 llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
 delete mode 100644 llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 1663aedb62c8e..9e22055d8cdb8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -715,7 +715,7 @@ inline loop_invariant_vpvalue m_LoopInvVPValue() {
 }
 
 template <typename Op0_t>
-inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+inline VPInstruction_match<VPInstruction::AnyOf, Op0_t>
 m_AnyOf(const Op0_t &Op0) {
   return m_VPInstruction<VPInstruction::AnyOf>(Op0);
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 4aee6ea2073a2..325704f7acb08 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -143,17 +143,20 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
 }
 
 std::optional<VPValue *>
-vputils::getRecipesForUncountedExit(VPlan &Plan,
-                                    SmallVectorImpl<VPRecipeBase *> &Recipes,
-                                    SmallVectorImpl<VPRecipeBase *> &GEPs) {
+vputils::getRecipesForUncountableExit(VPlan &Plan,
+                                      SmallVectorImpl<VPRecipeBase *> &Recipes,
+                                      SmallVectorImpl<VPRecipeBase *> &GEPs) {
   using namespace llvm::VPlanPatternMatch;
-  // Given a vplan like the following (just including the recipes contributing
+  // Given a VPlan like the following (just including the recipes contributing
   // to loop control exiting here, not the actual work), we're looking to match
-  // the recipes contributing to the uncounted exit condition comparison
+  // the recipes contributing to the uncountable exit condition comparison
   // (here, vp<%4>) back to the canonical induction for the vector body so that
   // we can copy them to a preheader and rotate the address in the loop to the
   // next vector iteration.
   //
+  // Currently, the address of the load is restricted to a GEP with 2 terms and
+  // a loop invariant base address. This constraint may be relaxed later.
+  //
   // VPlan ' for UF>=1' {
   // Live-in vp<%0> = VF
   // Live-in ir<64> = original trip-count
@@ -187,18 +190,16 @@ vputils::getRecipesForUncountedExit(VPlan &Plan,
   // No successors
   // }
 
-  // Find the uncounted loop exit condition.
+  // Find the uncountable loop exit condition.
   auto *Region = Plan.getVectorLoopRegion();
-  VPValue *UncountedCondition = nullptr;
-  if (!match(
-          Region->getExitingBasicBlock()->getTerminator(),
-          m_BranchOnCond(m_OneUse(m_c_BinaryOr(
-              m_OneUse(m_AnyOf(m_VPValue(UncountedCondition))), m_VPValue())))))
+  VPValue *UncountableCondition = nullptr;
+  if (!match(Region->getExitingBasicBlock()->getTerminator(),
+             m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+                 m_AnyOf(m_VPValue(UncountableCondition)), m_VPValue())))))
     return std::nullopt;
 
   SmallVector<VPValue *, 4> Worklist;
-  SmallVector<VPWidenLoadRecipe *, 1> Loads;
-  Worklist.push_back(UncountedCondition);
+  Worklist.push_back(UncountableCondition);
   while (!Worklist.empty()) {
     VPValue *V = Worklist.pop_back_val();
 
@@ -212,39 +213,28 @@ vputils::getRecipesForUncountedExit(VPlan &Plan,
     if (V->getNumUsers() > 1)
       return std::nullopt;
 
+    VPValue *Op1, *Op2;
     // Walk back through recipes until we find at least one load from memory.
-    if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
-      if (Cmp->getOpcode() != Instruction::ICmp)
-        return std::nullopt;
-      Worklist.push_back(Cmp->getOperand(0));
-      Worklist.push_back(Cmp->getOperand(1));
-      Recipes.push_back(Cmp);
+    if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
+      Worklist.push_back(Op1);
+      Worklist.push_back(Op2);
+      Recipes.push_back(V->getDefiningRecipe());
     } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
       // Reject masked loads for the time being; they make the exit condition
       // more complex.
       if (Load->isMasked())
         return std::nullopt;
-      Loads.push_back(Load);
-    } else
-      return std::nullopt;
-  }
 
-  // Check the loads for exact patterns; for now we only support a contiguous
-  // load based directly on the canonical IV with a step of 1.
-  for (VPWidenLoadRecipe *Load : Loads) {
-    Recipes.push_back(Load);
-    VPValue *GEP = Load->getAddr();
-
-    if (!match(GEP, m_GetElementPtr(
-                        m_LoopInvVPValue(),
-                        m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
-                                        m_SpecificInt(1),
-                                        m_Specific(&Plan.getVF())))))
-      return std::nullopt;
+      VPValue *GEP = Load->getAddr();
+      if (!match(GEP, m_GetElementPtr(m_LoopInvVPValue(), m_VPValue())))
+        return std::nullopt;
 
-    Recipes.push_back(GEP->getDefiningRecipe());
-    GEPs.push_back(GEP->getDefiningRecipe());
+      Recipes.push_back(Load);
+      Recipes.push_back(GEP->getDefiningRecipe());
+      GEPs.push_back(GEP->getDefiningRecipe());
+    } else
+      return std::nullopt;
   }
 
-  return UncountedCondition;
+  return UncountableCondition;
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index c5c18c0a91326..35294c7566457 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -102,15 +102,15 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
 /// exist.
 VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
 
-/// Returns the VPValue representing the uncounted exit comparison used by
+/// Returns the VPValue representing the uncountable exit comparison used by
 /// AnyOf if the recipes it depends on can be traced back to live-ins and
 /// the canonical IV and it is deemed safe to copy those recipes into the
 /// vector preheader. The recipes are stored in \p Recipes, and recipes
 /// forming an address for a load are also added to \p GEPs.
 std::optional<VPValue *>
-getRecipesForUncountedExit(VPlan &Plan,
-                           SmallVectorImpl<VPRecipeBase *> &Recipes,
-                           SmallVectorImpl<VPRecipeBase *> &GEPs);
+getRecipesForUncountableExit(VPlan &Plan,
+                             SmallVectorImpl<VPRecipeBase *> &Recipes,
+                             SmallVectorImpl<VPRecipeBase *> &GEPs);
 } // namespace vputils
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
index a7254922af007..af111a29b90e5 100644
--- a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -14,6 +14,6 @@ add_llvm_unittest(VectorizeTests
   VPlanHCFGTest.cpp
   VPlanPatternMatchTest.cpp
   VPlanSlpTest.cpp
-  VPlanUncountedExitTest.cpp
+  VPlanUncountableExitTest.cpp
   VPlanVerifierTest.cpp
   )
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index 383f79bc87a45..ed6e13b4add3d 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -65,7 +65,7 @@ class VPlanTestIRBase : public testing::Test {
   }
 
   /// Build the VPlan for the loop starting from \p LoopHeader.
-  VPlanPtr buildVPlan(BasicBlock *LoopHeader) {
+  VPlanPtr buildVPlan(BasicBlock *LoopHeader, bool HasUncountableExit = false) {
     Function &F = *LoopHeader->getParent();
     assert(!verifyFunction(F) && "input function must be valid");
     doAnalysis(F);
@@ -75,7 +75,7 @@ class VPlanTestIRBase : public testing::Test {
     auto Plan = VPlanTransforms::buildVPlan0(L, *LI, IntegerType::get(*Ctx, 64),
                                              {}, PSE);
 
-    VPlanTransforms::handleEarlyExits(*Plan, false);
+    VPlanTransforms::handleEarlyExits(*Plan, HasUncountableExit);
     VPlanTransforms::addMiddleCheck(*Plan, true, false);
 
     VPlanTransforms::createLoopRegions(*Plan);
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
new file mode 100644
index 0000000000000..dc23f1428fdd9
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
@@ -0,0 +1,65 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp ---===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanUtils.h"
+#include "VPlanTestBase.h"
+#include "llvm/ADT/SmallVector.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+namespace {
+class VPUncountableExitTest : public VPlanTestIRBase {};
+
+TEST_F(VPUncountableExitTest, FindUncountableExitRecipes) {
+  const char *ModuleString =
+      "define void @f(ptr %array, ptr %pred) {\n"
+      "entry:\n"
+      "  br label %for.body\n"
+      "for.body:\n"
+      "  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]\n"
+      "  %st.addr = getelementptr inbounds i16, ptr %array, i64 %iv\n"
+      "  %data = load i16, ptr %st.addr, align 2\n"
+      "  %inc = add nsw i16 %data, 1\n"
+      "  store i16 %inc, ptr %st.addr, align 2\n"
+      "  %uncountable.addr = getelementptr inbounds nuw i16, ptr %pred, i64 "
+      "%iv\n"
+      "  %uncountable.val = load i16, ptr %uncountable.addr, align 2\n"
+      "  %uncountable.cond = icmp sgt i16 %uncountable.val, 500\n"
+      "  br i1 %uncountable.cond, label %exit, label %for.inc\n"
+      "for.inc:\n"
+      "  %iv.next = add nuw nsw i64 %iv, 1\n"
+      "  %countable.cond = icmp eq i64 %iv.next, 20\n"
+      " br i1 %countable.cond, label %exit, label %for.body\n"
+      "exit:\n"
+      "  ret void\n"
+      "}\n";
+
+  Module &M = parseModule(ModuleString);
+
+  Function *F = M.getFunction("f");
+  BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+  auto Plan = buildVPlan(LoopHeader, /*HasUncountableExit=*/true);
+  VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
+      Plan, [](PHINode *P) { return nullptr; }, *TLI);
+  VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
+
+  SmallVector<VPRecipeBase *, 8> Recipes;
+  SmallVector<VPRecipeBase *, 2> GEPs;
+
+  std::optional<VPValue *> UncountableCondition =
+      vputils::getRecipesForUncountableExit(*Plan, Recipes, GEPs);
+  ASSERT_TRUE(UncountableCondition.has_value());
+  ASSERT_EQ(GEPs.size(), 1ull);
+  ASSERT_EQ(Recipes.size(), 3ull);
+}
+
+} // namespace
+} // namespace llvm
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
deleted file mode 100644
index 92bd0433e1233..0000000000000
--- a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp -----===//
-//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../lib/Transforms/Vectorize/LoopVectorizationPlanner.h"
-#include "../lib/Transforms/Vectorize/VPlan.h"
-#include "../lib/Transforms/Vectorize/VPlanPatternMatch.h"
-#include "../lib/Transforms/Vectorize/VPlanUtils.h"
-#include "VPlanTestBase.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "gtest/gtest.h"
-
-namespace llvm {
-
-namespace {
-class VPUncountedExitTest : public VPlanTestBase {};
-
-TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
-  // Create CFG skeleton.
-  VPlan &Plan = getPlan();
-  VPBasicBlock *ScalarPH = Plan.getEntry();
-  VPBasicBlock *Entry = Plan.createVPBasicBlock("entry");
-  Plan.setEntry(Entry);
-  VPBasicBlock *VectorPH = Plan.createVPBasicBlock("vector.ph");
-  VPBasicBlock *VecBody = Plan.createVPBasicBlock("vector.body");
-  VPRegionBlock *Region =
-      Plan.createVPRegionBlock(VecBody, VecBody, "vector loop");
-  VPBasicBlock *MiddleBlock = Plan.createVPBasicBlock("middle.block");
-  VPBlockUtils::connectBlocks(Entry, ScalarPH);
-  VPBlockUtils::connectBlocks(Entry, VectorPH);
-  VPBlockUtils::connectBlocks(VectorPH, Region);
-  VPBlockUtils::connectBlocks(Region, MiddleBlock);
-  VPBlockUtils::connectBlocks(MiddleBlock, ScalarPH);
-
-  // Live-Ins
-  IntegerType *I64Ty = IntegerType::get(C, 64);
-  IntegerType *I32Ty = IntegerType::get(C, 32);
-  PointerType *PTy = PointerType::get(C, 0);
-  VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0));
-  VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1));
-  VPValue *VF = &Plan.getVF();
-  Plan.setTripCount(Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 64)));
-
-  // Populate vector.body with the recipes for exiting.
-  auto *IV = new VPCanonicalIVPHIRecipe(Zero, {});
-  VecBody->appendRecipe(IV);
-  VPBuilder Builder(VecBody, VecBody->getFirstNonPhi());
-  auto *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, IV, Inc,
-                                            VF, DebugLoc());
-
-  // Uncounted Exit; GEP -> Load -> Cmp
-  auto *DummyGEP = GetElementPtrInst::Create(I32Ty, Zero->getUnderlyingValue(),
-                                             {}, Twine("ee.addr"));
-  auto *GEP = new VPReplicateRecipe(DummyGEP, {Zero, Steps}, true, nullptr);
-  Builder.insert(GEP);
-  auto *DummyLoad =
-      new LoadInst(I32Ty, PoisonValue::get(PTy), "ee.load", false, Align(1));
-  VPValue *Load =
-      new VPWidenLoadRecipe(*DummyLoad, GEP, nullptr, true, false, {}, {});
-  Builder.insert(Load->getDefiningRecipe());
-  // Should really splat the zero, but we're not checking types here.
-  VPValue *Cmp = new VPWidenRecipe(Instruction::ICmp, {Load, Zero},
-                                   VPIRFlags(CmpInst::ICMP_EQ), {}, {});
-  Builder.insert(Cmp->getDefiningRecipe());
-  VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, Cmp);
-
-  // Counted Exit; Inc IV -> Cmp
-  VPValue *NextIV = Builder.createNaryOp(Instruction::Add, {IV, VF});
-  VPValue *Counted =
-      Builder.createICmp(CmpInst::ICMP_EQ, NextIV, Plan.getTripCount());
-
-  // Combine, and branch.
-  VPValue *Combined = Builder.createNaryOp(Instruction::Or, {AnyOf, Counted});
-  Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
-
-  SmallVector<VPRecipeBase *, 8> Recipes;
-  SmallVector<VPRecipeBase *, 2> GEPs;
-
-  std::optional<VPValue *> UncountedCondition =
-      vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);
-  ASSERT_TRUE(UncountedCondition.has_value());
-  ASSERT_EQ(*UncountedCondition, Cmp);
-  ASSERT_EQ(GEPs.size(), 1ull);
-  ASSERT_EQ(GEPs[0], GEP);
-  ASSERT_EQ(Recipes.size(), 3ull);
-
-  delete DummyLoad;
-  delete DummyGEP;
-}
-
-} // namespace
-} // namespace llvm

>From 2f81520e57d12ed971fe2d7c9798d9ecc8940159 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 16 Sep 2025 13:49:19 +0000
Subject: [PATCH 4/4] Switch to m_LiveIn, update comment

---
 .../Transforms/Vectorize/VPlanPatternMatch.h  | 20 +++++++++----------
 llvm/lib/Transforms/Vectorize/VPlanUtils.cpp  |  2 +-
 llvm/lib/Transforms/Vectorize/VPlanUtils.h    |  7 ++++---
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 9e22055d8cdb8..99336108faf77 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -345,6 +345,12 @@ m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) {
   return m_VPInstruction<VPInstruction::BranchOnCount>(Op0, Op1);
 }
 
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::AnyOf, Op0_t>
+m_AnyOf(const Op0_t &Op0) {
+  return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
 template <unsigned Opcode, typename Op0_t>
 inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
   return AllRecipe_match<Opcode, Op0_t>(Op0);
@@ -703,22 +709,14 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
   return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
 }
 
-struct loop_invariant_vpvalue {
+struct live_in_vpvalue {
   template <typename ITy> bool match(ITy *V) const {
     VPValue *Val = dyn_cast<VPValue>(V);
-    return Val && Val->isDefinedOutsideLoopRegions();
+    return Val && Val->isLiveIn();
   }
 };
 
-inline loop_invariant_vpvalue m_LoopInvVPValue() {
-  return loop_invariant_vpvalue();
-}
-
-template <typename Op0_t>
-inline VPInstruction_match<VPInstruction::AnyOf, Op0_t>
-m_AnyOf(const Op0_t &Op0) {
-  return m_VPInstruction<VPInstruction::AnyOf>(Op0);
-}
+inline live_in_vpvalue m_LiveIn() { return live_in_vpvalue(); }
 
 template <typename SubPattern_t> struct OneUse_match {
   SubPattern_t SubPattern;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 325704f7acb08..83e1109abfcdc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -226,7 +226,7 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
         return std::nullopt;
 
       VPValue *GEP = Load->getAddr();
-      if (!match(GEP, m_GetElementPtr(m_LoopInvVPValue(), m_VPValue())))
+      if (!match(GEP, m_GetElementPtr(m_LiveIn(), m_VPValue())))
         return std::nullopt;
 
       Recipes.push_back(Load);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 35294c7566457..33dd8efaec2db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -104,9 +104,10 @@ VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
 
 /// Returns the VPValue representing the uncountable exit comparison used by
 /// AnyOf if the recipes it depends on can be traced back to live-ins and
-/// the canonical IV and it is deemed safe to copy those recipes into the
-/// vector preheader. The recipes are stored in \p Recipes, and recipes
-/// forming an address for a load are also added to \p GEPs.
+/// the addresses (in GEP/PtrAdd form) of any (non-masked) load used in
+/// generating the values for the comparison. The recipes are stored in
+/// \p Recipes, and recipes forming an address for a load are also added to
+/// \p GEPs.
 std::optional<VPValue *>
 getRecipesForUncountableExit(VPlan &Plan,
                              SmallVectorImpl<VPRecipeBase *> &Recipes,



More information about the llvm-commits mailing list