[llvm] [LV] Provide utility routine to find uncounted exit recipes (PR #152530)
Graham Hunter via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 16 07:16:39 PDT 2025
https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/152530
>From 7d755295dd33c6ac01675bb4f7e2dae6ea3c80d0 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 30 Jul 2025 09:23:13 +0000
Subject: [PATCH 1/4] WIP
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 31 ++++++
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 100 ++++++++++++++++++
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 8 ++
llvm/lib/Transforms/Vectorize/VPlanValue.h | 2 +
.../Transforms/Vectorize/CMakeLists.txt | 1 +
.../Vectorize/VPlanUncountedExitTest.cpp | 99 +++++++++++++++++
6 files changed, 241 insertions(+)
create mode 100644 llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 401a2cbd9a5ca..1663aedb62c8e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -703,6 +703,37 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
}
+struct loop_invariant_vpvalue {
+ template <typename ITy> bool match(ITy *V) const {
+ VPValue *Val = dyn_cast<VPValue>(V);
+ return Val && Val->isDefinedOutsideLoopRegions();
+ }
+};
+
+inline loop_invariant_vpvalue m_LoopInvVPValue() {
+ return loop_invariant_vpvalue();
+}
+
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+m_AnyOf(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
+template <typename SubPattern_t> struct OneUse_match {
+ SubPattern_t SubPattern;
+
+ OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ return V->hasOneUse() && SubPattern.match(V);
+ }
+};
+
+template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
+ return SubPattern;
+}
+
} // namespace VPlanPatternMatch
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index ddc4ad1977401..b322b5b44b230 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -141,3 +141,103 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
});
return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
}
+
+std::optional<VPValue *> vputils::getRecipesForUncountedExit(
+ VPlan &Plan, SmallVectorImpl<VPRecipeBase *> &Recipes,
+ SmallVectorImpl<VPReplicateRecipe *> &GEPs) {
+ using namespace llvm::VPlanPatternMatch;
+ // Given a vplan like the following (just including the recipes contributing
+ // to loop control exiting here, not the actual work), we're looking to match
+ // the recipes contributing to the uncounted exit condition comparison
+ // (here, vp<%4>) back to the canonical induction for the vector body so that
+ // we can copy them to a preheader and rotate the address in the loop to the
+ // next vector iteration.
+ //
+ // VPlan ' for UF>=1' {
+ // Live-in vp<%0> = VF
+ // Live-in ir<64> = original trip-count
+ //
+ // entry:
+ // Successor(s): preheader, vector.ph
+ //
+ // vector.ph:
+ // Successor(s): vector loop
+ //
+ // <x1> vector loop: {
+ // vector.body:
+ // EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
+ // vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
+ // CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
+ // WIDEN ir<%ee.load> = load ir<%ee.addr>
+ // WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
+ // EMIT vp<%5> = any-of vp<%4>
+ // EMIT vp<%6> = add vp<%2>, vp<%0>
+ // EMIT vp<%7> = icmp eq vp<%6>, ir<64>
+ // EMIT vp<%8> = or vp<%5>, vp<%7>
+ // EMIT branch-on-cond vp<%8>
+ // No successors
+ // }
+ // Successor(s): middle.block
+ //
+ // middle.block:
+ // Successor(s): preheader
+ //
+ // preheader:
+ // No successors
+ // }
+
+ // Find the uncounted loop exit condition.
+ auto *Region = Plan.getVectorLoopRegion();
+ VPValue *UncountedCondition = nullptr;
+ if (!match(
+ Region->getExitingBasicBlock()->getTerminator(),
+ m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+ m_OneUse(m_AnyOf(m_VPValue(UncountedCondition))), m_VPValue())))))
+ return std::nullopt;
+
+ SmallVector<VPValue *, 4> Worklist;
+ bool LoadFound = false;
+ Worklist.push_back(UncountedCondition);
+ while (!Worklist.empty()) {
+ VPValue *V = Worklist.pop_back_val();
+
+ if (V->isDefinedOutsideLoopRegions())
+ continue;
+ if (V->getNumUsers() > 1)
+ return std::nullopt;
+
+ if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
+ if (Cmp->getOpcode() != Instruction::ICmp)
+ return std::nullopt;
+ Worklist.push_back(Cmp->getOperand(0));
+ Worklist.push_back(Cmp->getOperand(1));
+ Recipes.push_back(Cmp);
+ } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
+ if (!Load->isConsecutive() || Load->isMasked())
+ return std::nullopt;
+ Worklist.push_back(Load->getAddr());
+ Recipes.push_back(Load);
+ LoadFound = true;
+ } else if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(V)) {
+ Worklist.push_back(VecPtr->getOperand(0));
+ Recipes.push_back(VecPtr);
+ } else if (auto *GEP = dyn_cast<VPReplicateRecipe>(V)) {
+ if (GEP->getNumOperands() != 2)
+ return std::nullopt;
+ if (!match(GEP, m_GetElementPtr(
+ m_LoopInvVPValue(),
+ m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
+ m_SpecificInt(1),
+ m_Specific(&Plan.getVF())))))
+ return std::nullopt;
+ GEPs.push_back(GEP);
+ Recipes.push_back(GEP);
+ } else
+ return std::nullopt;
+ }
+
+ if (GEPs.empty() || !LoadFound)
+ return std::nullopt;
+
+ return UncountedCondition;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 77c099b271717..503f599586489 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -101,6 +101,14 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
/// Returns the header block of the first, top-level loop, or null if none
/// exist.
VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
+
+/// Returns the VPValue representing the uncounted exit comparison if all the
+/// recipes needed to form the condition within the vector loop body were
+/// matched.
+std::optional<VPValue *>
+getRecipesForUncountedExit(VPlan &Plan,
+ SmallVectorImpl<VPRecipeBase *> &Recipes,
+ SmallVectorImpl<VPReplicateRecipe *> &GEPs);
} // namespace vputils
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 85c6c2c8d7965..0678bc90ef4b5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -148,6 +148,8 @@ class LLVM_ABI_FOR_TEST VPValue {
return Current != user_end();
}
+ bool hasOneUse() const { return getNumUsers() == 1; }
+
void replaceAllUsesWith(VPValue *New);
/// Go through the uses list for this VPValue and make each use point to \p
diff --git a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
index 53eeff28c185f..a7254922af007 100644
--- a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -14,5 +14,6 @@ add_llvm_unittest(VectorizeTests
VPlanHCFGTest.cpp
VPlanPatternMatchTest.cpp
VPlanSlpTest.cpp
+ VPlanUncountedExitTest.cpp
VPlanVerifierTest.cpp
)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
new file mode 100644
index 0000000000000..81ef67a0fb923
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
@@ -0,0 +1,99 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp -----===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/LoopVectorizationPlanner.h"
+#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanPatternMatch.h"
+#include "../lib/Transforms/Vectorize/VPlanUtils.h"
+#include "VPlanTestBase.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+namespace {
+class VPUncountedExitTest : public VPlanTestBase {};
+
+TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
+ // Create CFG skeleton.
+ VPlan &Plan = getPlan();
+ VPBasicBlock *ScalarPH = Plan.getEntry();
+ VPBasicBlock *Entry = Plan.createVPBasicBlock("entry");
+ Plan.setEntry(Entry);
+ VPBasicBlock *VectorPH = Plan.createVPBasicBlock("vector.ph");
+ VPBasicBlock *VecBody = Plan.createVPBasicBlock("vector.body");
+ VPRegionBlock *Region =
+ Plan.createVPRegionBlock(VecBody, VecBody, "vector loop");
+ VPBasicBlock *MiddleBlock = Plan.createVPBasicBlock("middle.block");
+ VPBlockUtils::connectBlocks(Entry, ScalarPH);
+ VPBlockUtils::connectBlocks(Entry, VectorPH);
+ VPBlockUtils::connectBlocks(VectorPH, Region);
+ VPBlockUtils::connectBlocks(Region, MiddleBlock);
+ VPBlockUtils::connectBlocks(MiddleBlock, ScalarPH);
+
+ // Live-Ins
+ IntegerType *I64Ty = IntegerType::get(C, 64);
+ IntegerType *I32Ty = IntegerType::get(C, 32);
+ PointerType *PTy = PointerType::get(C, 0);
+ VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0));
+ VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1));
+ VPValue *VF = &Plan.getVF();
+ Plan.setTripCount(Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 64)));
+
+ // Populate vector.body with the recipes for exiting.
+ auto *IV = new VPCanonicalIVPHIRecipe(Zero, {});
+ VecBody->appendRecipe(IV);
+ VPBuilder Builder(VecBody, VecBody->getFirstNonPhi());
+ auto *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, IV, Inc,
+ VF, DebugLoc());
+
+ // Uncounted Exit; GEP -> Load -> Cmp
+ auto *DummyGEP = GetElementPtrInst::Create(I32Ty, Zero->getUnderlyingValue(),
+ {}, Twine("ee.addr"));
+ auto *GEP = new VPReplicateRecipe(DummyGEP, {Zero, Steps}, true, nullptr);
+ Builder.insert(GEP);
+ auto *DummyLoad =
+ new LoadInst(I32Ty, PoisonValue::get(PTy), "ee.load", false, Align(1));
+ VPValue *Load =
+ new VPWidenLoadRecipe(*DummyLoad, GEP, nullptr, true, false, {}, {});
+ Builder.insert(Load->getDefiningRecipe());
+ // Should really splat the zero, but we're not checking types here.
+ VPValue *Cmp = new VPWidenRecipe(Instruction::ICmp, {Load, Zero},
+ VPIRFlags(CmpInst::ICMP_EQ), {}, {});
+ Builder.insert(Cmp->getDefiningRecipe());
+ VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, Cmp);
+
+ // Counted Exit; Inc IV -> Cmp
+ VPValue *NextIV = Builder.createNaryOp(Instruction::Add, {IV, VF});
+ VPValue *Counted =
+ Builder.createICmp(CmpInst::ICMP_EQ, NextIV, Plan.getTripCount());
+
+ // Combine, and branch.
+ VPValue *Combined = Builder.createNaryOp(Instruction::Or, {AnyOf, Counted});
+ Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
+
+ SmallVector<VPRecipeBase *, 8> Recipes;
+ SmallVector<VPReplicateRecipe *, 2> GEPs;
+
+ std::optional<VPValue *> UncountedCondition =
+ vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);
+ ASSERT_TRUE(UncountedCondition.has_value());
+ ASSERT_EQ(*UncountedCondition, Cmp);
+ ASSERT_EQ(GEPs.size(), 1ull);
+ ASSERT_EQ(GEPs[0], GEP);
+ ASSERT_EQ(Recipes.size(), 3ull);
+
+ delete DummyLoad;
+ delete DummyGEP;
+}
+
+} // namespace
+} // namespace llvm
>From 688eadb9c8db9bf9bc7d11f729653761f9179af3 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 12 Aug 2025 10:39:05 +0000
Subject: [PATCH 2/4] Improve comments, be stricter about matching inputs to
loads
---
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 55 +++++++++++--------
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 10 ++--
.../Vectorize/VPlanUncountedExitTest.cpp | 2 +-
3 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index b322b5b44b230..4aee6ea2073a2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -142,9 +142,10 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I);
}
-std::optional<VPValue *> vputils::getRecipesForUncountedExit(
- VPlan &Plan, SmallVectorImpl<VPRecipeBase *> &Recipes,
- SmallVectorImpl<VPReplicateRecipe *> &GEPs) {
+std::optional<VPValue *>
+vputils::getRecipesForUncountedExit(VPlan &Plan,
+ SmallVectorImpl<VPRecipeBase *> &Recipes,
+ SmallVectorImpl<VPRecipeBase *> &GEPs) {
using namespace llvm::VPlanPatternMatch;
// Given a vplan like the following (just including the recipes contributing
// to loop control exiting here, not the actual work), we're looking to match
@@ -196,16 +197,22 @@ std::optional<VPValue *> vputils::getRecipesForUncountedExit(
return std::nullopt;
SmallVector<VPValue *, 4> Worklist;
- bool LoadFound = false;
+ SmallVector<VPWidenLoadRecipe *, 1> Loads;
Worklist.push_back(UncountedCondition);
while (!Worklist.empty()) {
VPValue *V = Worklist.pop_back_val();
+ // Any value defined outside the loop does not need to be copied.
if (V->isDefinedOutsideLoopRegions())
continue;
+
+ // FIXME: Remove the single user restriction; it's here because we're
+ // starting with the simplest set of loops we can, and multiple
+ // users means needing to add PHI nodes in the transform.
if (V->getNumUsers() > 1)
return std::nullopt;
+ // Walk back through recipes until we find at least one load from memory.
if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
if (Cmp->getOpcode() != Instruction::ICmp)
return std::nullopt;
@@ -213,31 +220,31 @@ std::optional<VPValue *> vputils::getRecipesForUncountedExit(
Worklist.push_back(Cmp->getOperand(1));
Recipes.push_back(Cmp);
} else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
- if (!Load->isConsecutive() || Load->isMasked())
- return std::nullopt;
- Worklist.push_back(Load->getAddr());
- Recipes.push_back(Load);
- LoadFound = true;
- } else if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(V)) {
- Worklist.push_back(VecPtr->getOperand(0));
- Recipes.push_back(VecPtr);
- } else if (auto *GEP = dyn_cast<VPReplicateRecipe>(V)) {
- if (GEP->getNumOperands() != 2)
+ // Reject masked loads for the time being; they make the exit condition
+ // more complex.
+ if (Load->isMasked())
return std::nullopt;
- if (!match(GEP, m_GetElementPtr(
- m_LoopInvVPValue(),
- m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
- m_SpecificInt(1),
- m_Specific(&Plan.getVF())))))
- return std::nullopt;
- GEPs.push_back(GEP);
- Recipes.push_back(GEP);
+ Loads.push_back(Load);
} else
return std::nullopt;
}
- if (GEPs.empty() || !LoadFound)
- return std::nullopt;
+ // Check the loads for exact patterns; for now we only support a contiguous
+ // load based directly on the canonical IV with a step of 1.
+ for (VPWidenLoadRecipe *Load : Loads) {
+ Recipes.push_back(Load);
+ VPValue *GEP = Load->getAddr();
+
+ if (!match(GEP, m_GetElementPtr(
+ m_LoopInvVPValue(),
+ m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
+ m_SpecificInt(1),
+ m_Specific(&Plan.getVF())))))
+ return std::nullopt;
+
+ Recipes.push_back(GEP->getDefiningRecipe());
+ GEPs.push_back(GEP->getDefiningRecipe());
+ }
return UncountedCondition;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 503f599586489..c5c18c0a91326 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -102,13 +102,15 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
/// exist.
VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
-/// Returns the VPValue representing the uncounted exit comparison if all the
-/// recipes needed to form the condition within the vector loop body were
-/// matched.
+/// Returns the VPValue representing the uncounted exit comparison used by
+/// AnyOf if the recipes it depends on can be traced back to live-ins and
+/// the canonical IV and it is deemed safe to copy those recipes into the
+/// vector preheader. The recipes are stored in \p Recipes, and recipes
+/// forming an address for a load are also added to \p GEPs.
std::optional<VPValue *>
getRecipesForUncountedExit(VPlan &Plan,
SmallVectorImpl<VPRecipeBase *> &Recipes,
- SmallVectorImpl<VPReplicateRecipe *> &GEPs);
+ SmallVectorImpl<VPRecipeBase *> &GEPs);
} // namespace vputils
//===----------------------------------------------------------------------===//
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
index 81ef67a0fb923..92bd0433e1233 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
@@ -81,7 +81,7 @@ TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
SmallVector<VPRecipeBase *, 8> Recipes;
- SmallVector<VPReplicateRecipe *, 2> GEPs;
+ SmallVector<VPRecipeBase *, 2> GEPs;
std::optional<VPValue *> UncountedCondition =
vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);
>From 3ad198d5ab65a9678c2088f885e1c7d6731e3777 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 3 Sep 2025 13:35:25 +0000
Subject: [PATCH 3/4] Create test vplan from IR; minor cleanups
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 2 +-
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 66 ++++++-------
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 8 +-
.../Transforms/Vectorize/CMakeLists.txt | 2 +-
.../Transforms/Vectorize/VPlanTestBase.h | 4 +-
.../Vectorize/VPlanUncountableExitTest.cpp | 65 ++++++++++++
.../Vectorize/VPlanUncountedExitTest.cpp | 99 -------------------
7 files changed, 101 insertions(+), 145 deletions(-)
create mode 100644 llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
delete mode 100644 llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 1663aedb62c8e..9e22055d8cdb8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -715,7 +715,7 @@ inline loop_invariant_vpvalue m_LoopInvVPValue() {
}
template <typename Op0_t>
-inline UnaryVPInstruction_match<Op0_t, VPInstruction::AnyOf>
+inline VPInstruction_match<VPInstruction::AnyOf, Op0_t>
m_AnyOf(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::AnyOf>(Op0);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 4aee6ea2073a2..325704f7acb08 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -143,17 +143,20 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
}
std::optional<VPValue *>
-vputils::getRecipesForUncountedExit(VPlan &Plan,
- SmallVectorImpl<VPRecipeBase *> &Recipes,
- SmallVectorImpl<VPRecipeBase *> &GEPs) {
+vputils::getRecipesForUncountableExit(VPlan &Plan,
+ SmallVectorImpl<VPRecipeBase *> &Recipes,
+ SmallVectorImpl<VPRecipeBase *> &GEPs) {
using namespace llvm::VPlanPatternMatch;
- // Given a vplan like the following (just including the recipes contributing
+ // Given a VPlan like the following (just including the recipes contributing
// to loop control exiting here, not the actual work), we're looking to match
- // the recipes contributing to the uncounted exit condition comparison
+ // the recipes contributing to the uncountable exit condition comparison
// (here, vp<%4>) back to the canonical induction for the vector body so that
// we can copy them to a preheader and rotate the address in the loop to the
// next vector iteration.
//
+ // Currently, the address of the load is restricted to a GEP with 2 terms and
+ // a loop invariant base address. This constraint may be relaxed later.
+ //
// VPlan ' for UF>=1' {
// Live-in vp<%0> = VF
// Live-in ir<64> = original trip-count
@@ -187,18 +190,16 @@ vputils::getRecipesForUncountedExit(VPlan &Plan,
// No successors
// }
- // Find the uncounted loop exit condition.
+ // Find the uncountable loop exit condition.
auto *Region = Plan.getVectorLoopRegion();
- VPValue *UncountedCondition = nullptr;
- if (!match(
- Region->getExitingBasicBlock()->getTerminator(),
- m_BranchOnCond(m_OneUse(m_c_BinaryOr(
- m_OneUse(m_AnyOf(m_VPValue(UncountedCondition))), m_VPValue())))))
+ VPValue *UncountableCondition = nullptr;
+ if (!match(Region->getExitingBasicBlock()->getTerminator(),
+ m_BranchOnCond(m_OneUse(m_c_BinaryOr(
+ m_AnyOf(m_VPValue(UncountableCondition)), m_VPValue())))))
return std::nullopt;
SmallVector<VPValue *, 4> Worklist;
- SmallVector<VPWidenLoadRecipe *, 1> Loads;
- Worklist.push_back(UncountedCondition);
+ Worklist.push_back(UncountableCondition);
while (!Worklist.empty()) {
VPValue *V = Worklist.pop_back_val();
@@ -212,39 +213,28 @@ vputils::getRecipesForUncountedExit(VPlan &Plan,
if (V->getNumUsers() > 1)
return std::nullopt;
+ VPValue *Op1, *Op2;
// Walk back through recipes until we find at least one load from memory.
- if (auto *Cmp = dyn_cast<VPWidenRecipe>(V)) {
- if (Cmp->getOpcode() != Instruction::ICmp)
- return std::nullopt;
- Worklist.push_back(Cmp->getOperand(0));
- Worklist.push_back(Cmp->getOperand(1));
- Recipes.push_back(Cmp);
+ if (match(V, m_ICmp(m_VPValue(Op1), m_VPValue(Op2)))) {
+ Worklist.push_back(Op1);
+ Worklist.push_back(Op2);
+ Recipes.push_back(V->getDefiningRecipe());
} else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(V)) {
// Reject masked loads for the time being; they make the exit condition
// more complex.
if (Load->isMasked())
return std::nullopt;
- Loads.push_back(Load);
- } else
- return std::nullopt;
- }
- // Check the loads for exact patterns; for now we only support a contiguous
- // load based directly on the canonical IV with a step of 1.
- for (VPWidenLoadRecipe *Load : Loads) {
- Recipes.push_back(Load);
- VPValue *GEP = Load->getAddr();
-
- if (!match(GEP, m_GetElementPtr(
- m_LoopInvVPValue(),
- m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()),
- m_SpecificInt(1),
- m_Specific(&Plan.getVF())))))
- return std::nullopt;
+ VPValue *GEP = Load->getAddr();
+ if (!match(GEP, m_GetElementPtr(m_LoopInvVPValue(), m_VPValue())))
+ return std::nullopt;
- Recipes.push_back(GEP->getDefiningRecipe());
- GEPs.push_back(GEP->getDefiningRecipe());
+ Recipes.push_back(Load);
+ Recipes.push_back(GEP->getDefiningRecipe());
+ GEPs.push_back(GEP->getDefiningRecipe());
+ } else
+ return std::nullopt;
}
- return UncountedCondition;
+ return UncountableCondition;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index c5c18c0a91326..35294c7566457 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -102,15 +102,15 @@ bool isUniformAcrossVFsAndUFs(VPValue *V);
/// exist.
VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
-/// Returns the VPValue representing the uncounted exit comparison used by
+/// Returns the VPValue representing the uncountable exit comparison used by
/// AnyOf if the recipes it depends on can be traced back to live-ins and
/// the canonical IV and it is deemed safe to copy those recipes into the
/// vector preheader. The recipes are stored in \p Recipes, and recipes
/// forming an address for a load are also added to \p GEPs.
std::optional<VPValue *>
-getRecipesForUncountedExit(VPlan &Plan,
- SmallVectorImpl<VPRecipeBase *> &Recipes,
- SmallVectorImpl<VPRecipeBase *> &GEPs);
+getRecipesForUncountableExit(VPlan &Plan,
+ SmallVectorImpl<VPRecipeBase *> &Recipes,
+ SmallVectorImpl<VPRecipeBase *> &GEPs);
} // namespace vputils
//===----------------------------------------------------------------------===//
diff --git a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
index a7254922af007..af111a29b90e5 100644
--- a/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/CMakeLists.txt
@@ -14,6 +14,6 @@ add_llvm_unittest(VectorizeTests
VPlanHCFGTest.cpp
VPlanPatternMatchTest.cpp
VPlanSlpTest.cpp
- VPlanUncountedExitTest.cpp
+ VPlanUncountableExitTest.cpp
VPlanVerifierTest.cpp
)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index 383f79bc87a45..ed6e13b4add3d 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -65,7 +65,7 @@ class VPlanTestIRBase : public testing::Test {
}
/// Build the VPlan for the loop starting from \p LoopHeader.
- VPlanPtr buildVPlan(BasicBlock *LoopHeader) {
+ VPlanPtr buildVPlan(BasicBlock *LoopHeader, bool HasUncountableExit = false) {
Function &F = *LoopHeader->getParent();
assert(!verifyFunction(F) && "input function must be valid");
doAnalysis(F);
@@ -75,7 +75,7 @@ class VPlanTestIRBase : public testing::Test {
auto Plan = VPlanTransforms::buildVPlan0(L, *LI, IntegerType::get(*Ctx, 64),
{}, PSE);
- VPlanTransforms::handleEarlyExits(*Plan, false);
+ VPlanTransforms::handleEarlyExits(*Plan, HasUncountableExit);
VPlanTransforms::addMiddleCheck(*Plan, true, false);
VPlanTransforms::createLoopRegions(*Plan);
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
new file mode 100644
index 0000000000000..dc23f1428fdd9
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp
@@ -0,0 +1,65 @@
+//===- llvm/unittests/Transforms/Vectorize/VPlanUncountableExitTest.cpp ---===//
+//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanUtils.h"
+#include "VPlanTestBase.h"
+#include "llvm/ADT/SmallVector.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+namespace {
+class VPUncountableExitTest : public VPlanTestIRBase {};
+
+TEST_F(VPUncountableExitTest, FindUncountableExitRecipes) {
+ const char *ModuleString =
+ "define void @f(ptr %array, ptr %pred) {\n"
+ "entry:\n"
+ " br label %for.body\n"
+ "for.body:\n"
+ " %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]\n"
+ " %st.addr = getelementptr inbounds i16, ptr %array, i64 %iv\n"
+ " %data = load i16, ptr %st.addr, align 2\n"
+ " %inc = add nsw i16 %data, 1\n"
+ " store i16 %inc, ptr %st.addr, align 2\n"
+ " %uncountable.addr = getelementptr inbounds nuw i16, ptr %pred, i64 "
+ "%iv\n"
+ " %uncountable.val = load i16, ptr %uncountable.addr, align 2\n"
+ " %uncountable.cond = icmp sgt i16 %uncountable.val, 500\n"
+ " br i1 %uncountable.cond, label %exit, label %for.inc\n"
+ "for.inc:\n"
+ " %iv.next = add nuw nsw i64 %iv, 1\n"
+ " %countable.cond = icmp eq i64 %iv.next, 20\n"
+ " br i1 %countable.cond, label %exit, label %for.body\n"
+ "exit:\n"
+ " ret void\n"
+ "}\n";
+
+ Module &M = parseModule(ModuleString);
+
+ Function *F = M.getFunction("f");
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
+ auto Plan = buildVPlan(LoopHeader, /*HasUncountableExit=*/true);
+ VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
+ Plan, [](PHINode *P) { return nullptr; }, *TLI);
+ VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan);
+
+ SmallVector<VPRecipeBase *, 8> Recipes;
+ SmallVector<VPRecipeBase *, 2> GEPs;
+
+ std::optional<VPValue *> UncountableCondition =
+ vputils::getRecipesForUncountableExit(*Plan, Recipes, GEPs);
+ ASSERT_TRUE(UncountableCondition.has_value());
+ ASSERT_EQ(GEPs.size(), 1ull);
+ ASSERT_EQ(Recipes.size(), 3ull);
+}
+
+} // namespace
+} // namespace llvm
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
deleted file mode 100644
index 92bd0433e1233..0000000000000
--- a/llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- llvm/unittests/Transforms/Vectorize/VPlanUncountedExitTest.cpp -----===//
-//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "../lib/Transforms/Vectorize/LoopVectorizationPlanner.h"
-#include "../lib/Transforms/Vectorize/VPlan.h"
-#include "../lib/Transforms/Vectorize/VPlanPatternMatch.h"
-#include "../lib/Transforms/Vectorize/VPlanUtils.h"
-#include "VPlanTestBase.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "gtest/gtest.h"
-
-namespace llvm {
-
-namespace {
-class VPUncountedExitTest : public VPlanTestBase {};
-
-TEST_F(VPUncountedExitTest, FindUncountedExitRecipes) {
- // Create CFG skeleton.
- VPlan &Plan = getPlan();
- VPBasicBlock *ScalarPH = Plan.getEntry();
- VPBasicBlock *Entry = Plan.createVPBasicBlock("entry");
- Plan.setEntry(Entry);
- VPBasicBlock *VectorPH = Plan.createVPBasicBlock("vector.ph");
- VPBasicBlock *VecBody = Plan.createVPBasicBlock("vector.body");
- VPRegionBlock *Region =
- Plan.createVPRegionBlock(VecBody, VecBody, "vector loop");
- VPBasicBlock *MiddleBlock = Plan.createVPBasicBlock("middle.block");
- VPBlockUtils::connectBlocks(Entry, ScalarPH);
- VPBlockUtils::connectBlocks(Entry, VectorPH);
- VPBlockUtils::connectBlocks(VectorPH, Region);
- VPBlockUtils::connectBlocks(Region, MiddleBlock);
- VPBlockUtils::connectBlocks(MiddleBlock, ScalarPH);
-
- // Live-Ins
- IntegerType *I64Ty = IntegerType::get(C, 64);
- IntegerType *I32Ty = IntegerType::get(C, 32);
- PointerType *PTy = PointerType::get(C, 0);
- VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 0));
- VPValue *Inc = Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 1));
- VPValue *VF = &Plan.getVF();
- Plan.setTripCount(Plan.getOrAddLiveIn(ConstantInt::get(I64Ty, 64)));
-
- // Populate vector.body with the recipes for exiting.
- auto *IV = new VPCanonicalIVPHIRecipe(Zero, {});
- VecBody->appendRecipe(IV);
- VPBuilder Builder(VecBody, VecBody->getFirstNonPhi());
- auto *Steps = Builder.createScalarIVSteps(Instruction::Add, nullptr, IV, Inc,
- VF, DebugLoc());
-
- // Uncounted Exit; GEP -> Load -> Cmp
- auto *DummyGEP = GetElementPtrInst::Create(I32Ty, Zero->getUnderlyingValue(),
- {}, Twine("ee.addr"));
- auto *GEP = new VPReplicateRecipe(DummyGEP, {Zero, Steps}, true, nullptr);
- Builder.insert(GEP);
- auto *DummyLoad =
- new LoadInst(I32Ty, PoisonValue::get(PTy), "ee.load", false, Align(1));
- VPValue *Load =
- new VPWidenLoadRecipe(*DummyLoad, GEP, nullptr, true, false, {}, {});
- Builder.insert(Load->getDefiningRecipe());
- // Should really splat the zero, but we're not checking types here.
- VPValue *Cmp = new VPWidenRecipe(Instruction::ICmp, {Load, Zero},
- VPIRFlags(CmpInst::ICMP_EQ), {}, {});
- Builder.insert(Cmp->getDefiningRecipe());
- VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, Cmp);
-
- // Counted Exit; Inc IV -> Cmp
- VPValue *NextIV = Builder.createNaryOp(Instruction::Add, {IV, VF});
- VPValue *Counted =
- Builder.createICmp(CmpInst::ICMP_EQ, NextIV, Plan.getTripCount());
-
- // Combine, and branch.
- VPValue *Combined = Builder.createNaryOp(Instruction::Or, {AnyOf, Counted});
- Builder.createNaryOp(VPInstruction::BranchOnCond, {Combined});
-
- SmallVector<VPRecipeBase *, 8> Recipes;
- SmallVector<VPRecipeBase *, 2> GEPs;
-
- std::optional<VPValue *> UncountedCondition =
- vputils::getRecipesForUncountedExit(Plan, Recipes, GEPs);
- ASSERT_TRUE(UncountedCondition.has_value());
- ASSERT_EQ(*UncountedCondition, Cmp);
- ASSERT_EQ(GEPs.size(), 1ull);
- ASSERT_EQ(GEPs[0], GEP);
- ASSERT_EQ(Recipes.size(), 3ull);
-
- delete DummyLoad;
- delete DummyGEP;
-}
-
-} // namespace
-} // namespace llvm
>From 2f81520e57d12ed971fe2d7c9798d9ecc8940159 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Tue, 16 Sep 2025 13:49:19 +0000
Subject: [PATCH 4/4] Switch to m_LiveIn, update comment
---
.../Transforms/Vectorize/VPlanPatternMatch.h | 20 +++++++++----------
llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlanUtils.h | 7 ++++---
3 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 9e22055d8cdb8..99336108faf77 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -345,6 +345,12 @@ m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) {
return m_VPInstruction<VPInstruction::BranchOnCount>(Op0, Op1);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::AnyOf, Op0_t>
+m_AnyOf(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::AnyOf>(Op0);
+}
+
template <unsigned Opcode, typename Op0_t>
inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
return AllRecipe_match<Opcode, Op0_t>(Op0);
@@ -703,22 +709,14 @@ m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
}
-struct loop_invariant_vpvalue {
+struct live_in_vpvalue {
template <typename ITy> bool match(ITy *V) const {
VPValue *Val = dyn_cast<VPValue>(V);
- return Val && Val->isDefinedOutsideLoopRegions();
+ return Val && Val->isLiveIn();
}
};
-inline loop_invariant_vpvalue m_LoopInvVPValue() {
- return loop_invariant_vpvalue();
-}
-
-template <typename Op0_t>
-inline VPInstruction_match<VPInstruction::AnyOf, Op0_t>
-m_AnyOf(const Op0_t &Op0) {
- return m_VPInstruction<VPInstruction::AnyOf>(Op0);
-}
+inline live_in_vpvalue m_LiveIn() { return live_in_vpvalue(); }
template <typename SubPattern_t> struct OneUse_match {
SubPattern_t SubPattern;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 325704f7acb08..83e1109abfcdc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -226,7 +226,7 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
return std::nullopt;
VPValue *GEP = Load->getAddr();
- if (!match(GEP, m_GetElementPtr(m_LoopInvVPValue(), m_VPValue())))
+ if (!match(GEP, m_GetElementPtr(m_LiveIn(), m_VPValue())))
return std::nullopt;
Recipes.push_back(Load);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 35294c7566457..33dd8efaec2db 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -104,9 +104,10 @@ VPBasicBlock *getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT);
/// Returns the VPValue representing the uncountable exit comparison used by
/// AnyOf if the recipes it depends on can be traced back to live-ins and
-/// the canonical IV and it is deemed safe to copy those recipes into the
-/// vector preheader. The recipes are stored in \p Recipes, and recipes
-/// forming an address for a load are also added to \p GEPs.
+/// the addresses (in GEP/PtrAdd form) of any (non-masked) load used in
+/// generating the values for the comparison. The recipes are stored in
+/// \p Recipes, and recipes forming an address for a load are also added to
+/// \p GEPs.
std::optional<VPValue *>
getRecipesForUncountableExit(VPlan &Plan,
SmallVectorImpl<VPRecipeBase *> &Recipes,
More information about the llvm-commits
mailing list