[llvm-branch-commits] [llvm] [LV] Vectorize uncountable early exit store loops with combined conditions (PR #205109)
Graham Hunter via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jun 24 05:49:31 PDT 2026
https://github.com/huntergr-arm updated https://github.com/llvm/llvm-project/pull/205109
>From 9274730ce4f57f795a889380dde53aae74f41049 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Fri, 5 Jun 2026 10:19:21 +0000
Subject: [PATCH 1/2] [LV] Vectorize uncountable early exit store loops with
combined conditions
Support the case where both the countable and uncountable exit conditions
have been combined by earlier passes.
---
.../Vectorize/LoopVectorizationLegality.cpp | 67 ++++++++++++------
.../Transforms/Vectorize/LoopVectorize.cpp | 21 ++++++
.../Transforms/Vectorize/VPlanPatternMatch.h | 12 ++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 55 +++++++++++++++
.../VPlan/early_exit_with_stores_vplan.ll | 70 +++++++++++++++++++
.../X86/vectorization-remarks-missed.ll | 18 +++++
.../early_exit_combined_exits.ll | 32 ++++++++-
.../LoopVectorize/early_exit_legality.ll | 5 +-
.../early_exit_store_legality.ll | 2 +-
.../uncountable-single-exit-loops.ll | 2 +
10 files changed, 255 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 6813930c76a6d..a233a67f4db39 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1688,16 +1688,30 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
}
// The latch block must have a countable exit.
- if (isa<SCEVCouldNotCompute>(
- PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
+ if (isa<SCEVCouldNotCompute>(PSE.getSE()->getPredicatedExitCount(
+ TheLoop, LatchBB, &Predicates, ScalarEvolution::SymbolicMaximum))) {
reportVectorizationFailure(
"Cannot determine exact exit count for latch block",
"Cannot vectorize early exit loop",
"UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
return false;
}
- assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
- "Latch block not found in list of countable exits!");
+
+ if (!is_contained(CountableExitingBlocks, LatchBB)) {
+ // If not a separate counted exit in the latch, then check for a combined
+ // countable and uncountable exit.
+ BasicBlock *TrueBB, *FalseBB;
+ // Do we know the IV here?
+ if (!match(LatchBB->getTerminator(),
+ m_Br(m_c_LogicalOr(m_Value(), m_Cmp(m_Add(m_Value(), m_Value()),
+ m_Value())),
+ TrueBB, FalseBB))) {
+ reportVectorizationFailure(
+ "Latch block does not have a countable exit condition",
+ "NoCountableConditionInLatchBlock", ORE, TheLoop);
+ return false;
+ }
+ }
// Check to see if there are instructions that could potentially generate
// exceptions or have side-effects.
@@ -1775,6 +1789,13 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
}
}
+ // We're only handling combined exit conditions via masking at present, which
+ // is used for loops with side effects.
+ // TODO: Support readonly loops with combined exit conditions.
+ // TODO: Decouple style from the presence of side effects.
+ if (!llvm::is_contained(CountableExitingBlocks, LatchBB) && !HasSideEffects)
+ return false;
+
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
PSE.getSymbolicMaxBackedgeTakenCount();
// Since we have an exact exit count for the latch and the early exit
@@ -1804,9 +1825,16 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
Instruction *L = nullptr;
Value *Ptr = nullptr;
Value *R = nullptr;
- if (!match(Br->getCondition(),
- m_OneUse(m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))),
- m_Value(R))))) {
+ if (!match(
+ Br->getCondition(),
+ m_CombineOr(
+ m_OneUse(m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))),
+ m_Value(R))),
+ m_OneUse(m_LogicalOr(
+ m_OneUse(
+ m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))),
+ m_Value(R))),
+ m_ICmp(m_Add(m_Value(), m_Value()), m_Value())))))) {
reportVectorizationFailure(
"Early exit loop with store but no supported condition load",
"NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
@@ -1933,24 +1961,17 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return false;
}
- if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
- if (TheLoop->getExitingBlock()) {
+ if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount()) &&
+ !isVectorizableEarlyExitLoop()) {
+ assert(UncountableExitType == UncountableExitTrait::None &&
+ "Must be false without vectorizable early-exit loop");
+ if (TheLoop->getExitingBlock())
reportVectorizationFailure("Cannot vectorize uncountable loop",
"UnsupportedUncountableLoop", ORE, TheLoop);
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- } else {
- if (!isVectorizableEarlyExitLoop()) {
- assert(UncountableExitType == UncountableExitTrait::None &&
- "Must be false without vectorizable early-exit loop");
- if (DoExtraAnalysis)
- Result = false;
- else
- return false;
- }
- }
+ if (DoExtraAnalysis)
+ Result = false;
+ else
+ return false;
}
// Go over each instruction and look at memory deps.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3dbee08e7d7d8..69279a18dd2fd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2706,6 +2706,27 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
if (Legal->hasUncountableEarlyExit() && TheLoop->getLoopLatch() != E)
continue;
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
+ // TODO: This might occur for a multi-exit readonly loop too?
+ // Excluded for now in LVL.
+ // TODO: Do we have the main IV available somewhere? this feels a little
+ // fragile.
+ // If we have an exit condition that is actually two conditions combined
+ // via an or, only add the countable comparison as a uniform value.
+ if (Legal->hasUncountableExitWithSideEffects() &&
+ TheLoop->getLoopLatch() == E) {
+ Value *Uncounted, *Counted, *IV;
+ using namespace llvm::PatternMatch;
+ if (match(Cmp,
+ m_c_LogicalOr(
+ m_Value(Uncounted, m_Cmp(m_Load(m_Value()), m_Value())),
+ m_Value(Counted, m_Cmp(m_Add(m_Value(IV), m_Value()),
+ m_Value()))))) {
+ if (isa<PHINode>(IV)) {
+ AddToWorklistIfAllowed(cast<Instruction>(Counted));
+ continue;
+ }
+ }
+ }
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
AddToWorklistIfAllowed(Cmp);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 1f133cbaa95bb..e66442645b9c7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -203,6 +203,18 @@ inline bind_const_int m_ConstantInt(uint64_t &C) { return C; }
/// Match a VPValue, capturing it if we match.
inline match_bind<VPValue> m_VPValue(VPValue *&V) { return V; }
+/// Match against the nested pattern, and capture the value if we match.
+template <typename Pattern>
+inline auto m_VPValue(VPValue *&V, const Pattern &P) {
+ return m_CombineAnd(P, match_bind<VPValue>(V));
+}
+
+/// Match against the nested pattern, and capture the value if we match.
+template <typename Pattern>
+inline auto m_VPValue(const VPValue *&V, const Pattern &P) {
+ return m_CombineAnd(P, match_bind<const VPValue>(V));
+}
+
/// Match a VPIRValue.
inline match_bind<VPIRValue> m_VPIRValue(VPIRValue *&V) { return V; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index adcfe30ff9561..61dfe79df23c0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4638,6 +4638,61 @@ bool VPlanTransforms::handleUncountableEarlyExits(
}
}
+ // If we didn't find any, perhaps the exit was combined.
+ if (Exits.empty() && Plan.getExitBlocks().size() == 1) {
+ // TODO: Make this work with other styles.
+ if (Style != UncountableExitStyle::MaskedHandleExitInScalarLoop)
+ return false;
+
+ // TODO: Relax assumptions to cover more loops.
+ VPValue *Uncounted = nullptr;
+ VPValue *Counted = nullptr;
+ auto *IV = cast<VPSingleDefRecipe>(&HeaderVPBB->front());
+ VPRecipeBase *LatchBr = LatchVPBB->getTerminator();
+
+ if (!match(
+ LatchBr,
+ m_BranchOnCond(m_c_LogicalOr(
+ m_VPValue(Uncounted,
+ m_Cmp(m_VPInstruction<Instruction::Load>(m_VPValue()),
+ m_VPValue())),
+ m_VPValue(Counted, m_Cmp(m_Add(m_Specific(IV), m_VPValue()),
+ m_VPValue()))))))
+ return false;
+
+ // TODO: Exits currently assumes the ExitBlock must be an existing IR
+ // basic block, and MiddleVPBB doesn't qualify. For now, hack around
+ // this and duplicate the work from below.
+ // TODO: Find a nicer way to integrate this into the rest of the function.
+
+ auto *CondToEarlyExit =
+ LatchBuilder.createNaryOp(VPInstruction::MaskedCond, Uncounted);
+
+ VPValue *IsUncountableExitTaken =
+ LatchBuilder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
+
+ DebugLoc LatchDL = LatchBr->getDebugLoc();
+ VPSingleDefRecipe *LBC = cast<VPSingleDefRecipe>(LatchBr->getOperand(0));
+ LatchBr->eraseFromParent();
+ // Deleting the condition because of the single use restriction...
+ // TODO: Relax single use a bit?
+ LBC->eraseFromParent();
+ LatchBuilder.setInsertPoint(LatchVPBB);
+ LatchBuilder.createNaryOp(VPInstruction::BranchOnTwoConds,
+ {IsUncountableExitTaken, Counted}, LatchDL);
+ // TODO: Are we guaranteed to have the successors in the expected order
+ // at this point?
+ LatchVPBB->clearSuccessors();
+
+ // If handling the exiting lane in the scalar loop, combine the exit
+ // conditions into a single BranchOnCond.
+ LatchVPBB->setSuccessors({MiddleVPBB, MiddleVPBB, HeaderVPBB});
+ MiddleVPBB->clearPredecessors();
+ MiddleVPBB->setPredecessors({LatchVPBB, LatchVPBB});
+ return handleUncountableExitsWithSideEffects(
+ Plan, Exits, HeaderVPBB, LatchVPBB, MiddleVPBB, TheLoop, PSE, DT, AC);
+ }
+
assert(!Exits.empty() && "must have at least one early exit");
// Sort exits by RPO order to get correct program order. RPO gives a
// topological ordering of the CFG, ensuring upstream exits are checked
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll b/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll
index 822bc1d4f3d93..7786d869a4394 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll
@@ -270,6 +270,76 @@ exit:
}
define void @combined_exit_conditions(ptr align 4 dereferenceable(80) readonly %src, ptr align 4 dereferenceable(80) noalias %dst, ptr align 4 dereferenceable(80) readonly %pred) {
+; CHECK-LABEL: VPlan for loop in 'combined_exit_conditions'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VP0:%[0-9]+]]> = VF
+; CHECK-NEXT: Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
+; CHECK-NEXT: Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
+; CHECK-NEXT: Live-in ir<20> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vp<[[VP3:%[0-9]+]]> = CANONICAL-IV
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0]]>
+; CHECK-NEXT: vp<[[VP4:%[0-9]+]]> = SCALAR-STEPS vp<[[VP3]]>, ir<1>, vp<[[VP0]]>
+; CHECK-NEXT: CLONE ir<%ee.ptr> = getelementptr inbounds nuw ir<%pred>, vp<[[VP4]]>
+; CHECK-NEXT: vp<[[VP5:%[0-9]+]]> = vector-pointer inbounds nuw ir<%ee.ptr>, ir<1>
+; CHECK-NEXT: WIDEN ir<%ee.val> = load vp<[[VP5]]>
+; CHECK-NEXT: WIDEN ir<%ee.cmp> = icmp ne ir<%ee.val>, ir<0>
+; CHECK-NEXT: EMIT vp<[[VP6:%[0-9]+]]> = first-active-lane ir<%ee.cmp>
+; CHECK-NEXT: EMIT vp<%uncountable.exit.mask> = active lane mask ir<0>, vp<[[VP6]]>, ir<1>
+; CHECK-NEXT: CLONE ir<%src.ptr> = getelementptr ir<%src>, vp<[[VP4]]>
+; CHECK-NEXT: vp<[[VP7:%[0-9]+]]> = vector-pointer ir<%src.ptr>, ir<1>
+; CHECK-NEXT: WIDEN ir<%data> = load vp<[[VP7]]>, vp<%uncountable.exit.mask>
+; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%data>, ir<1>
+; CHECK-NEXT: CLONE ir<%dst.ptr> = getelementptr ir<%dst>, vp<[[VP4]]>
+; CHECK-NEXT: vp<[[VP8:%[0-9]+]]> = vector-pointer ir<%dst.ptr>, ir<1>
+; CHECK-NEXT: WIDEN store vp<[[VP8]]>, ir<%add>, vp<%uncountable.exit.mask>
+; CHECK-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = any-of ir<%ee.cmp>
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
+; CHECK-NEXT: EMIT vp<[[VP10:%[0-9]+]]> = icmp eq vp<%index.next>, vp<[[VP2]]>
+; CHECK-NEXT: EMIT branch-on-two-conds vp<[[VP9]]>, vp<[[VP10]]>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block, middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<[[VP12:%[0-9]+]]> = extract-lane ir<0>, ir<%iv>
+; CHECK-NEXT: EMIT vp<[[VP13:%[0-9]+]]> = add vp<[[VP12]]>, vp<[[VP6]]>
+; CHECK-NEXT: EMIT vp<[[VP14:%[0-9]+]]> = icmp eq vp<[[VP13]]>, ir<20>
+; CHECK-NEXT: EMIT branch-on-cond vp<[[VP14]]>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP13]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %src.ptr = getelementptr inbounds nuw [4 x i8], ptr %src, i64 %iv
+; CHECK-NEXT: IR %data = load i32, ptr %src.ptr, align 4
+; CHECK-NEXT: IR %add = add nsw i32 %data, 1
+; CHECK-NEXT: IR %dst.ptr = getelementptr inbounds nuw [4 x i8], ptr %dst, i64 %iv
+; CHECK-NEXT: IR store i32 %add, ptr %dst.ptr, align 4
+; CHECK-NEXT: IR %ee.ptr = getelementptr inbounds nuw [4 x i8], ptr %pred, i64 %iv
+; CHECK-NEXT: IR %ee.val = load i32, ptr %ee.ptr, align 4
+; CHECK-NEXT: IR %ee.cmp = icmp ne i32 %ee.val, 0
+; CHECK-NEXT: IR %iv.next = add nuw nsw i64 %iv, 1
+; CHECK-NEXT: IR %counted.cmp = icmp eq i64 %iv.next, 20
+; CHECK-NEXT: IR %combined.cond = select i1 %ee.cmp, i1 true, i1 %counted.cmp
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 5f2a93b230995..f233f71d93f9c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -48,6 +48,15 @@
; YAML: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: NoCountableConditionInLatchBlock
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 5, Column: 9 }
+; YAML-NEXT: Function: _Z4testPii
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: Latch block does not have a countable exit condition
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
; YAML-NEXT: Name: UnsupportedUncountableLoop
; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 5, Column: 9 }
; YAML-NEXT: Function: _Z4testPii
@@ -137,6 +146,15 @@
; YAML-NEXT: ...
; YAML-NEXT: --- !Analysis
; YAML-NEXT: Pass: loop-vectorize
+; YAML-NEXT: Name: RecurrencesInEarlyExitLoop
+; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function: test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT: - String: 'loop not vectorized: '
+; YAML-NEXT: - String: Cannot vectorize early exit loop with reductions or recurrences
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass: loop-vectorize
; YAML-NEXT: Name: UnsupportedUncountableLoop
; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 27, Column: 3 }
; YAML-NEXT: Function: test_multiple_failures
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll b/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll
index 43a62e19eb0c3..ad844ce816e80 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll
@@ -4,10 +4,36 @@
define void @combined_exit_conditions(ptr align 4 dereferenceable(80) readonly %src, ptr align 4 dereferenceable(80) noalias %dst, ptr align 4 dereferenceable(80) readonly %pred) {
; CHECK-LABEL: define void @combined_exit_conditions(
; CHECK-SAME: ptr readonly align 4 dereferenceable(80) [[SRC:%.*]], ptr noalias align 4 dereferenceable(80) [[DST:%.*]], ptr readonly align 4 dereferenceable(80) [[PRED:%.*]]) {
-; CHECK-NEXT: [[SCALAR_PH:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i8], ptr [[PRED]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
+; CHECK-NEXT: [[UNCOUNTABLE_EXIT_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP3]])
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP0]], <4 x i1> [[UNCOUNTABLE_EXIT_MASK]], <4 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1)
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [4 x i8], ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[UNCOUNTABLE_EXIT_MASK]])
+; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20
+; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[TMP10]], 20
+; CHECK-NEXT: br i1 [[TMP11]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]]
+; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: br label %[[FOR_BODY1:.*]]
; CHECK: [[FOR_BODY1]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY1]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[TMP10]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY1]] ]
; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds nuw [4 x i8], ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[DATA:%.*]] = load i32, ptr [[SRC_PTR]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[DATA]], 1
@@ -19,7 +45,7 @@ define void @combined_exit_conditions(ptr align 4 dereferenceable(80) readonly %
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[COUNTED_CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 20
; CHECK-NEXT: [[COMBINED_COND:%.*]] = select i1 [[EE_CMP]], i1 true, i1 [[COUNTED_CMP]]
-; CHECK-NEXT: br i1 [[COMBINED_COND]], label %[[EXIT:.*]], label %[[FOR_BODY1]]
+; CHECK-NEXT: br i1 [[COMBINED_COND]], label %[[EXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
index 88a2bccf9c904..69d28fec87cde 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
@@ -383,7 +383,7 @@ loop.end:
define i64 @uncountable_exit_infinite_loop() {
; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_infinite_loop'
-; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
+; CHECK: LV: Not vectorizing: Cannot determine exact exit count for latch block.
entry:
%p1 = alloca [1024 x i8]
%p2 = alloca [1024 x i8]
@@ -470,7 +470,8 @@ loop.end:
define void @exit_conditions_combined_in_single_branch(ptr noalias dereferenceable(40) %array, ptr readonly align 2 dereferenceable(40) %pred) {
; CHECK-LABEL: LV: Checking a loop in 'exit_conditions_combined_in_single_branch'
-; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
+; CHECK: LV: We can vectorize this loop!
+; CHECK: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit and side effects is not enabled.
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
index 9b2d26ae84102..36082c61296e2 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll
@@ -937,7 +937,7 @@ invalid.block:
define void @combined_exit_conditions(ptr align 4 dereferenceable(80) readonly %src, ptr align 4 dereferenceable(80) noalias %dst, ptr align 4 dereferenceable(80) readonly %pred) {
; CHECK-LABEL: LV: Checking a loop in 'combined_exit_conditions'
-; CHECK: LV: Not vectorizing: Cannot vectorize uncountable loop.
+; CHECK: LV: We can vectorize this loop!
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll b/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll
index e7ecf459ca6c9..327c9f1668854 100644
--- a/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll
@@ -4,12 +4,14 @@
; CHECK-LABEL: LV: Checking a loop in 'latch_exit_cannot_compute_btc_due_to_step'
; CHECK: LV: Did not find one integer induction var.
+; CHECK-NEXT: LV: Not vectorizing: Cannot determine exact exit count for latch block.
; CHECK-NEXT: LV: Not vectorizing: Cannot vectorize uncountable loop.
; CHECK-NEXT: LV: Not vectorizing: Cannot prove legality.
; CHECK-LABEL: LV: Checking a loop in 'header_exit_cannot_compute_btc_due_to_step'
; CHECK: LV: Found an induction variable.
; CHECK-NEXT: LV: Did not find one integer induction var.
+; CHECK-NEXT: LV: Not vectorizing: Cannot determine exact exit count for latch block.
; CHECK-NEXT: LV: Not vectorizing: Cannot vectorize uncountable loop.
; CHECK-NEXT: LV: Not vectorizing: Cannot prove legality.
>From a2f61a932aafba4fe9cb8a01b0c334fea56d08d6 Mon Sep 17 00:00:00 2001
From: Graham Hunter <graham.hunter at arm.com>
Date: Wed, 24 Jun 2026 12:48:35 +0000
Subject: [PATCH 2/2] Rebase to show phase ordering test vectorizing
---
...ountable-and-uncountable-exits-combined.ll | 40 +++++++++++++++++--
1 file changed, 36 insertions(+), 4 deletions(-)
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/countable-and-uncountable-exits-combined.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/countable-and-uncountable-exits-combined.ll
index 2b2f0fbc87762..475e32da9c419 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/countable-and-uncountable-exits-combined.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/countable-and-uncountable-exits-combined.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
+; RUN: opt -passes="default<O3>" -enable-early-exit-vectorization-with-side-effects -S %s | FileCheck %s
target triple = "aarch64"
@@ -26,9 +26,41 @@ define void @foo() #0 {
; CHECK-LABEL: define void @foo(
; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[TMP0]], 2500
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP1]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 10000, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4 x i8], ptr @c, i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <vscale x 4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD_FR]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP3]], i1 false)
+; CHECK-NEXT: [[UNCOUNTABLE_EXIT_MASK:%.*]] = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP4]])
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [4 x i8], ptr @src, i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP5]], <vscale x 4 x i1> [[UNCOUNTABLE_EXIT_MASK]], <vscale x 4 x i32> poison)
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 42)
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [4 x i8], ptr @dst, i64 [[INDEX]]
+; CHECK-NEXT: tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP6]], ptr align 4 [[TMP7]], <vscale x 4 x i1> [[UNCOUNTABLE_EXIT_MASK]])
+; CHECK-NEXT: [[TMP8:%.*]] = tail call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP3]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP11]], 10000
+; CHECK-NEXT: br i1 [[TMP12]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[FOR_BODY1:.*]]
+; CHECK: [[FOR_BODY1]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY1]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x i8], ptr @src, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[SRC_PTR:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[SRC_PTR]], 42
@@ -40,7 +72,7 @@ define void @foo() #0 {
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[EE_COND]], i1 true, i1 [[EXITCOND_NOT]]
-; CHECK-NEXT: br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[EXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
More information about the llvm-branch-commits
mailing list