[llvm-branch-commits] [llvm] [LV] Vectorize uncountable early exit store loops with combined conditions (PR #205109)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jun 22 06:53:01 PDT 2026


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Graham Hunter (huntergr-arm)

<details>
<summary>Changes</summary>

Support the case where both the countable and uncountable exit conditions have been combined by earlier passes.

Test PR: #<!-- -->205104

---

Patch is 23.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/205109.diff


10 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+44-23) 
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+21) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h (+12) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+55) 
- (modified) llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll (+70) 
- (modified) llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll (+18) 
- (modified) llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll (+29-3) 
- (modified) llvm/test/Transforms/LoopVectorize/early_exit_legality.ll (+3-2) 
- (modified) llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/uncountable-single-exit-loops.ll (+2) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 6813930c76a6d..a233a67f4db39 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1688,16 +1688,30 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   }
 
   // The latch block must have a countable exit.
-  if (isa<SCEVCouldNotCompute>(
-          PSE.getSE()->getPredicatedExitCount(TheLoop, LatchBB, &Predicates))) {
+  if (isa<SCEVCouldNotCompute>(PSE.getSE()->getPredicatedExitCount(
+          TheLoop, LatchBB, &Predicates, ScalarEvolution::SymbolicMaximum))) {
     reportVectorizationFailure(
         "Cannot determine exact exit count for latch block",
         "Cannot vectorize early exit loop",
         "UnknownLatchExitCountEarlyExitLoop", ORE, TheLoop);
     return false;
   }
-  assert(llvm::is_contained(CountableExitingBlocks, LatchBB) &&
-         "Latch block not found in list of countable exits!");
+
+  if (!is_contained(CountableExitingBlocks, LatchBB)) {
+    // If not a separate counted exit in the latch, then check for a combined
+    // countable and uncountable exit.
+    BasicBlock *TrueBB, *FalseBB;
+    // Do we know the IV here?
+    if (!match(LatchBB->getTerminator(),
+               m_Br(m_c_LogicalOr(m_Value(), m_Cmp(m_Add(m_Value(), m_Value()),
+                                                   m_Value())),
+                    TrueBB, FalseBB))) {
+      reportVectorizationFailure(
+          "Latch block does not have a countable exit condition",
+          "NoCountableConditionInLatchBlock", ORE, TheLoop);
+      return false;
+    }
+  }
 
   // Check to see if there are instructions that could potentially generate
   // exceptions or have side-effects.
@@ -1775,6 +1789,13 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
     }
   }
 
+  // We're only handling combined exit conditions via masking at present, which
+  // is used for loops with side effects.
+  // TODO: Support readonly loops with combined exit conditions.
+  // TODO: Decouple style from the presence of side effects.
+  if (!llvm::is_contained(CountableExitingBlocks, LatchBB) && !HasSideEffects)
+    return false;
+
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
       PSE.getSymbolicMaxBackedgeTakenCount();
   // Since we have an exact exit count for the latch and the early exit
@@ -1804,9 +1825,16 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved(
   Instruction *L = nullptr;
   Value *Ptr = nullptr;
   Value *R = nullptr;
-  if (!match(Br->getCondition(),
-             m_OneUse(m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))),
-                             m_Value(R))))) {
+  if (!match(
+          Br->getCondition(),
+          m_CombineOr(
+              m_OneUse(m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))),
+                              m_Value(R))),
+              m_OneUse(m_LogicalOr(
+                  m_OneUse(
+                      m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))),
+                             m_Value(R))),
+                  m_ICmp(m_Add(m_Value(), m_Value()), m_Value())))))) {
     reportVectorizationFailure(
         "Early exit loop with store but no supported condition load",
         "NoConditionLoadForEarlyExitLoop", ORE, TheLoop);
@@ -1933,24 +1961,17 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
       return false;
   }
 
-  if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
-    if (TheLoop->getExitingBlock()) {
+  if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount()) &&
+      !isVectorizableEarlyExitLoop()) {
+    assert(UncountableExitType == UncountableExitTrait::None &&
+           "Must be false without vectorizable early-exit loop");
+    if (TheLoop->getExitingBlock())
       reportVectorizationFailure("Cannot vectorize uncountable loop",
                                  "UnsupportedUncountableLoop", ORE, TheLoop);
-      if (DoExtraAnalysis)
-        Result = false;
-      else
-        return false;
-    } else {
-      if (!isVectorizableEarlyExitLoop()) {
-        assert(UncountableExitType == UncountableExitTrait::None &&
-               "Must be false without vectorizable early-exit loop");
-        if (DoExtraAnalysis)
-          Result = false;
-        else
-          return false;
-      }
-    }
+    if (DoExtraAnalysis)
+      Result = false;
+    else
+      return false;
   }
 
   // Go over each instruction and look at memory deps.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3dbee08e7d7d8..69279a18dd2fd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2706,6 +2706,27 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
     if (Legal->hasUncountableEarlyExit() && TheLoop->getLoopLatch() != E)
       continue;
     auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
+    // TODO: This might occur for a multi-exit readonly loop too?
+    //       Excluded for now in LVL.
+    // TODO: Do we have the main IV available somewhere? this feels a little
+    //       fragile.
+    // If we have an exit condition that is actually two conditions combined
+    // via an or, only add the countable comparison as a uniform value.
+    if (Legal->hasUncountableExitWithSideEffects() &&
+        TheLoop->getLoopLatch() == E) {
+      Value *Uncounted, *Counted, *IV;
+      using namespace llvm::PatternMatch;
+      if (match(Cmp,
+                m_c_LogicalOr(
+                    m_Value(Uncounted, m_Cmp(m_Load(m_Value()), m_Value())),
+                    m_Value(Counted, m_Cmp(m_Add(m_Value(IV), m_Value()),
+                                           m_Value()))))) {
+        if (isa<PHINode>(IV)) {
+          AddToWorklistIfAllowed(cast<Instruction>(Counted));
+          continue;
+        }
+      }
+    }
     if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
       AddToWorklistIfAllowed(Cmp);
   }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 1f133cbaa95bb..e66442645b9c7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -203,6 +203,18 @@ inline bind_const_int m_ConstantInt(uint64_t &C) { return C; }
 /// Match a VPValue, capturing it if we match.
 inline match_bind<VPValue> m_VPValue(VPValue *&V) { return V; }
 
+/// Match against the nested pattern, and capture the value if we match.
+template <typename Pattern>
+inline auto m_VPValue(VPValue *&V, const Pattern &P) {
+  return m_CombineAnd(P, match_bind<VPValue>(V));
+}
+
+/// Match against the nested pattern, and capture the value if we match.
+template <typename Pattern>
+inline auto m_VPValue(const VPValue *&V, const Pattern &P) {
+  return m_CombineAnd(P, match_bind<const VPValue>(V));
+}
+
 /// Match a VPIRValue.
 inline match_bind<VPIRValue> m_VPIRValue(VPIRValue *&V) { return V; }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index adcfe30ff9561..61dfe79df23c0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4638,6 +4638,61 @@ bool VPlanTransforms::handleUncountableEarlyExits(
     }
   }
 
+  // If we didn't find any, perhaps the exit was combined.
+  if (Exits.empty() && Plan.getExitBlocks().size() == 1) {
+    // TODO: Make this work with other styles.
+    if (Style != UncountableExitStyle::MaskedHandleExitInScalarLoop)
+      return false;
+
+    // TODO: Relax assumptions to cover more loops.
+    VPValue *Uncounted = nullptr;
+    VPValue *Counted = nullptr;
+    auto *IV = cast<VPSingleDefRecipe>(&HeaderVPBB->front());
+    VPRecipeBase *LatchBr = LatchVPBB->getTerminator();
+
+    if (!match(
+            LatchBr,
+            m_BranchOnCond(m_c_LogicalOr(
+                m_VPValue(Uncounted,
+                          m_Cmp(m_VPInstruction<Instruction::Load>(m_VPValue()),
+                                m_VPValue())),
+                m_VPValue(Counted, m_Cmp(m_Add(m_Specific(IV), m_VPValue()),
+                                         m_VPValue()))))))
+      return false;
+
+    // TODO: Exits currently assumes the ExitBlock must be an existing IR
+    //       basic block, and MiddleVPBB doesn't qualify. For now, hack around
+    //       this and duplicate the work from below.
+    // TODO: Find a nicer way to integrate this into the rest of the function.
+
+    auto *CondToEarlyExit =
+        LatchBuilder.createNaryOp(VPInstruction::MaskedCond, Uncounted);
+
+    VPValue *IsUncountableExitTaken =
+        LatchBuilder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
+
+    DebugLoc LatchDL = LatchBr->getDebugLoc();
+    VPSingleDefRecipe *LBC = cast<VPSingleDefRecipe>(LatchBr->getOperand(0));
+    LatchBr->eraseFromParent();
+    // Deleting the condition because of the single use restriction...
+    // TODO: Relax single use a bit?
+    LBC->eraseFromParent();
+    LatchBuilder.setInsertPoint(LatchVPBB);
+    LatchBuilder.createNaryOp(VPInstruction::BranchOnTwoConds,
+                              {IsUncountableExitTaken, Counted}, LatchDL);
+    // TODO: Are we guaranteed to have the successors in the expected order
+    //       at this point?
+    LatchVPBB->clearSuccessors();
+
+    // If handling the exiting lane in the scalar loop, combine the exit
+    // conditions into a single BranchOnCond.
+    LatchVPBB->setSuccessors({MiddleVPBB, MiddleVPBB, HeaderVPBB});
+    MiddleVPBB->clearPredecessors();
+    MiddleVPBB->setPredecessors({LatchVPBB, LatchVPBB});
+    return handleUncountableExitsWithSideEffects(
+        Plan, Exits, HeaderVPBB, LatchVPBB, MiddleVPBB, TheLoop, PSE, DT, AC);
+  }
+
   assert(!Exits.empty() && "must have at least one early exit");
   // Sort exits by RPO order to get correct program order. RPO gives a
   // topological ordering of the CFG, ensuring upstream exits are checked
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll b/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll
index 822bc1d4f3d93..7786d869a4394 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/early_exit_with_stores_vplan.ll
@@ -270,6 +270,76 @@ exit:
 }
 
 define void @combined_exit_conditions(ptr align 4 dereferenceable(80) readonly %src, ptr align 4 dereferenceable(80) noalias %dst, ptr align 4 dereferenceable(80) readonly %pred) {
+; CHECK-LABEL: VPlan for loop in 'combined_exit_conditions'
+; CHECK:  VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT:  Live-in vp<[[VP0:%[0-9]+]]> = VF
+; CHECK-NEXT:  Live-in vp<[[VP1:%[0-9]+]]> = VF * UF
+; CHECK-NEXT:  Live-in vp<[[VP2:%[0-9]+]]> = vector-trip-count
+; CHECK-NEXT:  Live-in ir<20> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<entry>:
+; CHECK-NEXT:  Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT:  vector.ph:
+; CHECK-NEXT:  Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT:  <x1> vector loop: {
+; CHECK-NEXT:  vp<[[VP3:%[0-9]+]]> = CANONICAL-IV
+; CHECK-EMPTY:
+; CHECK-NEXT:    vector.body:
+; CHECK-NEXT:      ir<%iv> = WIDEN-INDUCTION nuw nsw ir<0>, ir<1>, vp<[[VP0]]>
+; CHECK-NEXT:      vp<[[VP4:%[0-9]+]]> = SCALAR-STEPS vp<[[VP3]]>, ir<1>, vp<[[VP0]]>
+; CHECK-NEXT:      CLONE ir<%ee.ptr> = getelementptr inbounds nuw ir<%pred>, vp<[[VP4]]>
+; CHECK-NEXT:      vp<[[VP5:%[0-9]+]]> = vector-pointer inbounds nuw ir<%ee.ptr>, ir<1>
+; CHECK-NEXT:      WIDEN ir<%ee.val> = load vp<[[VP5]]>
+; CHECK-NEXT:      WIDEN ir<%ee.cmp> = icmp ne ir<%ee.val>, ir<0>
+; CHECK-NEXT:      EMIT vp<[[VP6:%[0-9]+]]> = first-active-lane ir<%ee.cmp>
+; CHECK-NEXT:      EMIT vp<%uncountable.exit.mask> = active lane mask ir<0>, vp<[[VP6]]>, ir<1>
+; CHECK-NEXT:      CLONE ir<%src.ptr> = getelementptr ir<%src>, vp<[[VP4]]>
+; CHECK-NEXT:      vp<[[VP7:%[0-9]+]]> = vector-pointer ir<%src.ptr>, ir<1>
+; CHECK-NEXT:      WIDEN ir<%data> = load vp<[[VP7]]>, vp<%uncountable.exit.mask>
+; CHECK-NEXT:      WIDEN ir<%add> = add nsw ir<%data>, ir<1>
+; CHECK-NEXT:      CLONE ir<%dst.ptr> = getelementptr ir<%dst>, vp<[[VP4]]>
+; CHECK-NEXT:      vp<[[VP8:%[0-9]+]]> = vector-pointer ir<%dst.ptr>, ir<1>
+; CHECK-NEXT:      WIDEN store vp<[[VP8]]>, ir<%add>, vp<%uncountable.exit.mask>
+; CHECK-NEXT:      EMIT vp<[[VP9:%[0-9]+]]> = any-of ir<%ee.cmp>
+; CHECK-NEXT:      EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1]]>
+; CHECK-NEXT:      EMIT vp<[[VP10:%[0-9]+]]> = icmp eq vp<%index.next>, vp<[[VP2]]>
+; CHECK-NEXT:      EMIT branch-on-two-conds vp<[[VP9]]>, vp<[[VP10]]>
+; CHECK-NEXT:    No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Successor(s): middle.block, middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT:  middle.block:
+; CHECK-NEXT:    EMIT vp<[[VP12:%[0-9]+]]> = extract-lane ir<0>, ir<%iv>
+; CHECK-NEXT:    EMIT vp<[[VP13:%[0-9]+]]> = add vp<[[VP12]]>, vp<[[VP6]]>
+; CHECK-NEXT:    EMIT vp<[[VP14:%[0-9]+]]> = icmp eq vp<[[VP13]]>, ir<20>
+; CHECK-NEXT:    EMIT branch-on-cond vp<[[VP14]]>
+; CHECK-NEXT:  Successor(s): ir-bb<exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<exit>:
+; CHECK-NEXT:  No successors
+; CHECK-EMPTY:
+; CHECK-NEXT:  scalar.ph:
+; CHECK-NEXT:    EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<[[VP13]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT:  Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT:  ir-bb<for.body>:
+; CHECK-NEXT:    IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT:    IR   %src.ptr = getelementptr inbounds nuw [4 x i8], ptr %src, i64 %iv
+; CHECK-NEXT:    IR   %data = load i32, ptr %src.ptr, align 4
+; CHECK-NEXT:    IR   %add = add nsw i32 %data, 1
+; CHECK-NEXT:    IR   %dst.ptr = getelementptr inbounds nuw [4 x i8], ptr %dst, i64 %iv
+; CHECK-NEXT:    IR   store i32 %add, ptr %dst.ptr, align 4
+; CHECK-NEXT:    IR   %ee.ptr = getelementptr inbounds nuw [4 x i8], ptr %pred, i64 %iv
+; CHECK-NEXT:    IR   %ee.val = load i32, ptr %ee.ptr, align 4
+; CHECK-NEXT:    IR   %ee.cmp = icmp ne i32 %ee.val, 0
+; CHECK-NEXT:    IR   %iv.next = add nuw nsw i64 %iv, 1
+; CHECK-NEXT:    IR   %counted.cmp = icmp eq i64 %iv.next, 20
+; CHECK-NEXT:    IR   %combined.cond = select i1 %ee.cmp, i1 true, i1 %counted.cmp
+; CHECK-NEXT:  No successors
+; CHECK-NEXT:  }
+;
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 5f2a93b230995..f233f71d93f9c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -48,6 +48,15 @@
 
 ; YAML:       --- !Analysis
 ; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            NoCountableConditionInLatchBlock
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 5, Column: 9 }
+; YAML-NEXT: Function:        _Z4testPii
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          Latch block does not have a countable exit condition
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
 ; YAML-NEXT: Name:            UnsupportedUncountableLoop
 ; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 5, Column: 9 }
 ; YAML-NEXT: Function:        _Z4testPii
@@ -137,6 +146,15 @@
 ; YAML-NEXT: ...
 ; YAML-NEXT: --- !Analysis
 ; YAML-NEXT: Pass:            loop-vectorize
+; YAML-NEXT: Name:            RecurrencesInEarlyExitLoop
+; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
+; YAML-NEXT: Function:        test_multiple_failures
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'loop not vectorized: '
+; YAML-NEXT:   - String:          Cannot vectorize early exit loop with reductions or recurrences
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Analysis
+; YAML-NEXT: Pass:            loop-vectorize
 ; YAML-NEXT: Name:            UnsupportedUncountableLoop
 ; YAML-NEXT: DebugLoc:        { File: source.cpp, Line: 27, Column: 3 }
 ; YAML-NEXT: Function:        test_multiple_failures
diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll b/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll
index 43a62e19eb0c3..ad844ce816e80 100644
--- a/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll
+++ b/llvm/test/Transforms/LoopVectorize/early_exit_combined_exits.ll
@@ -4,10 +4,36 @@
 define void @combined_exit_conditions(ptr align 4 dereferenceable(80) readonly %src, ptr align 4 dereferenceable(80) noalias %dst, ptr align 4 dereferenceable(80) readonly %pred) {
 ; CHECK-LABEL: define void @combined_exit_conditions(
 ; CHECK-SAME: ptr readonly align 4 dereferenceable(80) [[SRC:%.*]], ptr noalias align 4 dereferenceable(80) [[DST:%.*]], ptr readonly align 4 dereferenceable(80) [[PRED:%.*]]) {
-; CHECK-NEXT:  [[SCALAR_PH:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw [4 x i8], ptr [[PRED]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
+; CHECK-NEXT:    [[UNCOUNTABLE_EXIT_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP3]])
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP0]], <4 x i1> [[UNCOUNTABLE_EXIT_MASK]], <4 x i32> poison)
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr [4 x i8], ptr [[DST]], i64 [[INDEX]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[UNCOUNTABLE_EXIT_MASK]])
+; CHECK-NEXT:    [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20
+; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP3]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[TMP10]], 20
+; CHECK-NEXT:    br i1 [[TMP11]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]]
+; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    br label %[[FOR_BODY1:.*]]
 ; CHECK:       [[FOR_BODY1]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY1]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[TMP10]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY1]] ]
 ; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds nuw [4 x i8], ptr [[SRC]], i64 [[IV]]
 ; CHECK-NEXT:    [[DATA:%.*]] = load i32, ptr [[SRC_PTR]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[DATA]], 1
@@ -19,7 +45,7 @@ define void @combined_exit_conditions(ptr align 4 dereferenceable(80) readonly %
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[COUNTED_CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 20
 ; CHECK-NEXT:    [[COMBINED_COND:%.*]] = select i1 [[EE_CMP]], i1 true, i1 [[COUNTED_CMP]]
-; CHECK-NEXT:    br i1 [[COMBINED_COND]], label %[[EXIT:.*]], label %[[FOR_BODY1]]
+; CHECK-NEXT:    br i1 [[COMBINED_COND]], label %[[EXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
diff -...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/205109


More information about the llvm-branch-commits mailing list