[llvm] [AArch64] Add flag to control unrolling for small multi-exit loops (PR #131998)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 20 06:32:20 PDT 2025


https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/131998

>From 64c9960791297b6137f3dac59ed1d9a33f140d67 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 19 Mar 2025 10:22:25 +0000
Subject: [PATCH 1/2] [AArch64] Add flag to control unrolling for small
 multi-exit loops

It can be highly beneficial to unroll small, two-block search loops
that look for a value in an array. An example of this would be
something that uses std::find to find a value in libc++. Older
versions of std::find in the libstdc++ headers are manually unrolled
in the source code, but this might change in newer releases where
the compiler is expected to either vectorise or unroll itself.

This patch adds a new flag -small-multi-exit-loop-unroll-factor
that controls the amount of unrolling for such loops. This is
currently off by default, but in a future patch I plan to enable
this for some targets along with details of any performance
improvements.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  90 ++-
 .../AArch64/unrolling-multi-exit.ll           | 713 ++++++++++++++++++
 2 files changed, 793 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7cec8a17dfaaa..d3850217f1358 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -68,6 +68,11 @@ static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
 static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
                                       cl::init(true), cl::Hidden);
 
+static cl::opt<unsigned> SmallMultiExitLoopUF(
+    "small-multi-exit-loop-unroll-factor", cl::init(0), cl::Hidden,
+    cl::desc(
+        "Force unrolling of small multi-exit loops with given unroll factor"));
+
 // A complete guess as to a reasonable cost.
 static cl::opt<unsigned>
     BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
@@ -4237,6 +4242,70 @@ getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   }
 }
 
+static bool shouldUnrollLoopWithInstruction(Instruction &I,
+                                            AArch64TTIImpl &TTI) {
+  // Don't unroll vectorised loop.
+  if (I.getType()->isVectorTy())
+    return false;
+
+  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+    if (const Function *F = cast<CallBase>(I).getCalledFunction())
+      if (!TTI.isLoweredToCall(F))
+        return true;
+    return false;
+  }
+
+  return true;
+}
+
+static bool shouldUnrollSmallMultiExitLoop(Loop *L, ScalarEvolution &SE,
+                                           AArch64TTIImpl &TTI) {
+  // Small search loops with multiple exits can be highly beneficial to unroll.
+  // We only care about loops with exactly two exiting blocks, although each
+  // block could jump to the same exit block.
+  SmallVector<BasicBlock *> Blocks(L->getBlocks());
+  if (Blocks.size() != 2 || L->getExitingBlock())
+    return false;
+
+  if (any_of(Blocks, [](BasicBlock *BB) {
+        return !isa<BranchInst>(BB->getTerminator());
+      }))
+    return false;
+
+  // Only consider loops with unknown trip counts for which we can determine
+  // a symbolic expression. Multi-exit loops with small known trip counts will
+  // likely be unrolled anyway.
+  const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+  if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC))
+    return false;
+
+  // It might not be worth unrolling loops with low max trip counts. Restrict
+  // this to max trip counts > 32 for now.
+  unsigned MaxTC = SE.getSmallConstantMaxTripCount(L);
+  if (MaxTC > 0 && MaxTC <= 32)
+    return false;
+
+  // Estimate the size of the loop.
+  int64_t Size = 0;
+  for (auto *BB : L->getBlocks()) {
+    for (auto &I : *BB) {
+      if (!shouldUnrollLoopWithInstruction(I, TTI))
+        return false;
+
+      SmallVector<const Value *, 4> Operands(I.operand_values());
+      InstructionCost Cost =
+          TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
+      // This can happen with intrinsics that don't currently have a cost model
+      // or for some operations that require SVE.
+      if (!Cost.isValid())
+        return false;
+      Size += *Cost.getValue();
+    }
+  }
+
+  return Size < 6;
+}
+
 /// For Apple CPUs, we want to runtime-unroll loops to make better use if the
 /// OOO engine's wide instruction window and various predictors.
 static void
@@ -4412,22 +4481,23 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     break;
   }
 
+  if (SmallMultiExitLoopUF && shouldUnrollSmallMultiExitLoop(L, SE, *this)) {
+    UP.RuntimeUnrollMultiExit = true;
+    UP.Runtime = true;
+    // Limit unroll count.
+    UP.DefaultUnrollRuntimeCount = SmallMultiExitLoopUF;
+    // Allow slightly more costly trip-count expansion to catch search loops
+    // with pointer inductions.
+    UP.SCEVExpansionBudget = 5;
+  }
+
   // Scan the loop: don't unroll loops with calls as this could prevent
   // inlining. Don't unroll vector loops either, as they don't benefit much from
   // unrolling.
   for (auto *BB : L->getBlocks()) {
     for (auto &I : *BB) {
-      // Don't unroll vectorised loop.
-      if (I.getType()->isVectorTy())
+      if (!shouldUnrollLoopWithInstruction(I, *this))
         return;
-
-      if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-        if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
-          if (!isLoweredToCall(F))
-            continue;
-        }
-        return;
-      }
     }
   }
 
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll
new file mode 100644
index 0000000000000..b799b4328400a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll
@@ -0,0 +1,713 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-unroll -mcpu=generic -small-multi-exit-loop-unroll-factor=2 -S %s | FileCheck --check-prefixes=COMMON,UNROLL2 %s
+; RUN: opt -p loop-unroll -mcpu=generic -S %s | FileCheck --check-prefixes=COMMON,GENERIC %s
+
+target triple = "aarch64-linux-gnu"
+
+define i1 @multi_2_exiting_find_i8_loop_same_exit(ptr %vec, i8 %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_i8_loop_same_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT:    [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; UNROLL2-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; UNROLL2-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; UNROLL2-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
+; UNROLL2-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT:    [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
+; UNROLL2-NEXT:    [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2:       [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
+; UNROLL2-NEXT:    br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2:       [[ENTRY_NEW]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT:    [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT:    [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2:       [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
+; UNROLL2-NEXT:    [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2:       [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT:    br label %[[EXIT]]
+; UNROLL2:       [[EXIT]]:
+; UNROLL2-NEXT:    [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT:    ret i1 [[C_3]]
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_i8_loop_same_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; GENERIC-NEXT:  [[ENTRY:.*]]:
+; GENERIC-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; GENERIC-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT:    br label %[[LOOP_HEADER:.*]]
+; GENERIC:       [[LOOP_HEADER]]:
+; GENERIC-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; GENERIC-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC:       [[LOOP_LATCH]]:
+; GENERIC-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; GENERIC-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; GENERIC:       [[EXIT]]:
+; GENERIC-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; GENERIC-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; GENERIC-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %c.1 = icmp eq i8 %l, %tgt
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_diff_exit(ptr %vec, i8 %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_i8_loop_diff_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT:    [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; UNROLL2-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; UNROLL2-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; UNROLL2-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
+; UNROLL2-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT:    [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
+; UNROLL2-NEXT:    [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_PROL]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2:       [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
+; UNROLL2-NEXT:    br i1 [[TMP3]], label %[[LATCH_EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2:       [[ENTRY_NEW]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1]], label %[[EARLY_EXIT_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT:    [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT:    [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_1]], label %[[EARLY_EXIT_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2:       [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
+; UNROLL2-NEXT:    [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2_1]], label %[[LATCH_EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EARLY_EXIT_LOOPEXIT]]:
+; UNROLL2-NEXT:    br label %[[EARLY_EXIT]]
+; UNROLL2:       [[EARLY_EXIT]]:
+; UNROLL2-NEXT:    ret i1 true
+; UNROLL2:       [[LATCH_EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT:    br label %[[LATCH_EXIT]]
+; UNROLL2:       [[LATCH_EXIT]]:
+; UNROLL2-NEXT:    ret i1 false
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_i8_loop_diff_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; GENERIC-NEXT:  [[ENTRY:.*]]:
+; GENERIC-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; GENERIC-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT:    br label %[[LOOP_HEADER:.*]]
+; GENERIC:       [[LOOP_HEADER]]:
+; GENERIC-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; GENERIC-NEXT:    br i1 [[C_1]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC:       [[LOOP_LATCH]]:
+; GENERIC-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; GENERIC-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT:    br i1 [[C_2]], label %[[LATCH_EXIT:.*]], label %[[LOOP_HEADER]]
+; GENERIC:       [[EARLY_EXIT]]:
+; GENERIC-NEXT:    ret i1 true
+; GENERIC:       [[LATCH_EXIT]]:
+; GENERIC-NEXT:    ret i1 false
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %c.1 = icmp eq i8 %l, %tgt
+  br i1 %c.1, label %early.exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %latch.exit, label %loop.header
+
+early.exit:
+  ret i1 1
+
+latch.exit:
+  ret i1 0
+}
+
+
+define i1 @multi_2_exiting_find_ptr_loop_same_exit(ptr %vec, ptr %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_ptr_loop_same_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
+; UNROLL2-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; UNROLL2-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; UNROLL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; UNROLL2-NEXT:    [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; UNROLL2-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; UNROLL2-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP4]], 1
+; UNROLL2-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT:    [[L_PROL:%.*]] = load ptr, ptr [[START]], align 8
+; UNROLL2-NEXT:    [[C_1_PROL:%.*]] = icmp eq ptr [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2:       [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 8
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 1
+; UNROLL2-NEXT:    br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2:       [[ENTRY_NEW]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; UNROLL2-NEXT:    [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT:    [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2:       [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 8
+; UNROLL2-NEXT:    [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2:       [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT:    br label %[[EXIT]]
+; UNROLL2:       [[EXIT]]:
+; UNROLL2-NEXT:    [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT:    ret i1 [[C_3]]
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_ptr_loop_same_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; GENERIC-NEXT:  [[ENTRY:.*]]:
+; GENERIC-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; GENERIC-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; GENERIC-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; GENERIC-NEXT:    br label %[[LOOP_HEADER:.*]]
+; GENERIC:       [[LOOP_HEADER]]:
+; GENERIC-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; GENERIC-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC:       [[LOOP_LATCH]]:
+; GENERIC-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; GENERIC-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; GENERIC:       [[EXIT]]:
+; GENERIC-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; GENERIC-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; GENERIC-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; GENERIC-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+  %end = load ptr, ptr %gep.end, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load ptr, ptr %ptr.iv, align 8
+  %c.1 = icmp eq ptr %l, %tgt
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+
+define ptr @multi_2_exiting_find_ptr_loop_diff_exit(ptr %vec, ptr %tgt) {
+; UNROLL2-LABEL: define ptr @multi_2_exiting_find_ptr_loop_diff_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[START3:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    [[END2:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT:    [[TMP0:%.*]] = add i64 [[END2]], -8
+; UNROLL2-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START3]]
+; UNROLL2-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; UNROLL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; UNROLL2-NEXT:    [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; UNROLL2-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; UNROLL2-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP4]], 1
+; UNROLL2-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT:    [[L_PROL:%.*]] = load ptr, ptr [[START]], align 8
+; UNROLL2-NEXT:    [[C_1_PROL:%.*]] = icmp eq ptr [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_PROL]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2:       [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 8
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 1
+; UNROLL2-NEXT:    br i1 [[TMP6]], label %[[LATCH_EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2:       [[ENTRY_NEW]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1]], label %[[EARLY_EXIT_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; UNROLL2-NEXT:    [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT:    [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_1]], label %[[EARLY_EXIT_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2:       [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 8
+; UNROLL2-NEXT:    [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2_1]], label %[[LATCH_EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EARLY_EXIT_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[L_LCSSA_PH:%.*]] = phi ptr [ [[L]], %[[LOOP_HEADER]] ], [ [[L_1]], %[[LOOP_LATCH]] ]
+; UNROLL2-NEXT:    br label %[[EARLY_EXIT]]
+; UNROLL2:       [[EARLY_EXIT]]:
+; UNROLL2-NEXT:    [[L_LCSSA:%.*]] = phi ptr [ [[L_PROL]], %[[LOOP_HEADER_PROL]] ], [ [[L_LCSSA_PH]], %[[EARLY_EXIT_LOOPEXIT]] ]
+; UNROLL2-NEXT:    ret ptr [[L_LCSSA]]
+; UNROLL2:       [[LATCH_EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT:    br label %[[LATCH_EXIT]]
+; UNROLL2:       [[LATCH_EXIT]]:
+; UNROLL2-NEXT:    ret ptr [[END]]
+;
+; GENERIC-LABEL: define ptr @multi_2_exiting_find_ptr_loop_diff_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; GENERIC-NEXT:  [[ENTRY:.*]]:
+; GENERIC-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; GENERIC-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; GENERIC-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; GENERIC-NEXT:    br label %[[LOOP_HEADER:.*]]
+; GENERIC:       [[LOOP_HEADER]]:
+; GENERIC-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; GENERIC-NEXT:    br i1 [[C_1]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC:       [[LOOP_LATCH]]:
+; GENERIC-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; GENERIC-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT:    br i1 [[C_2]], label %[[LATCH_EXIT:.*]], label %[[LOOP_HEADER]]
+; GENERIC:       [[EARLY_EXIT]]:
+; GENERIC-NEXT:    [[L_LCSSA:%.*]] = phi ptr [ [[L]], %[[LOOP_HEADER]] ]
+; GENERIC-NEXT:    ret ptr [[L_LCSSA]]
+; GENERIC:       [[LATCH_EXIT]]:
+; GENERIC-NEXT:    ret ptr [[END]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+  %end = load ptr, ptr %gep.end, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load ptr, ptr %ptr.iv, align 8
+  %c.1 = icmp eq ptr %l, %tgt
+  br i1 %c.1, label %early.exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %latch.exit, label %loop.header
+
+early.exit:
+  ret ptr %l
+
+latch.exit:
+  ret ptr %end
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_too_large(ptr %vec, i8 %tgt) {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_too_large(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; COMMON-NEXT:  [[ENTRY:.*]]:
+; COMMON-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; COMMON-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT:    br label %[[LOOP_HEADER:.*]]
+; COMMON:       [[LOOP_HEADER]]:
+; COMMON-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT:    [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; COMMON-NEXT:    [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; COMMON-NEXT:    [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; COMMON-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON:       [[LOOP_LATCH]]:
+; COMMON-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %udiv = udiv i8 %l, %tgt
+  %udiv.2 = udiv i8 %udiv, 10
+  %c.1 = icmp eq i8 %udiv.2, 2
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+
+define i1 @multi_3_exiting_find_ptr_loop(ptr %vec, ptr %tgt, ptr %tgt2) {
+; COMMON-LABEL: define i1 @multi_3_exiting_find_ptr_loop(
+; COMMON-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT:  [[ENTRY:.*]]:
+; COMMON-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; COMMON-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; COMMON-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; COMMON-NEXT:    br label %[[LOOP_HEADER:.*]]
+; COMMON:       [[LOOP_HEADER]]:
+; COMMON-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; COMMON-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; COMMON-NEXT:    [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
+; COMMON-NEXT:    [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
+; COMMON-NEXT:    br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON:       [[LOOP_LATCH]]:
+; COMMON-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; COMMON-NEXT:    [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT:    br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; COMMON-NEXT:    [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT:    ret i1 [[C_4]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+  %end = load ptr, ptr %gep.end, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load ptr, ptr %ptr.iv, align 8
+  %c.1 = icmp eq ptr %l, %tgt
+  br i1 %c.1, label %exit, label %then
+
+then:
+  %c.2 = icmp eq ptr %l, %tgt2
+  br i1 %c.2, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+  %c.3 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.3, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %ptr.iv, %then], [ %end, %loop.latch ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  %c.4 = icmp eq ptr %res, %end
+  ret i1 %c.4
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_switch(ptr %vec, i8 %tgt) {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_switch(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT:  [[ENTRY:.*]]:
+; COMMON-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; COMMON-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT:    br label %[[LOOP_HEADER:.*]]
+; COMMON:       [[LOOP_HEADER]]:
+; COMMON-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT:    switch i8 [[L]], label %[[LOOP_LATCH]] [
+; COMMON-NEXT:      i8 0, label %[[EXIT_1:.*]]
+; COMMON-NEXT:      i8 1, label %[[EXIT_2:.*]]
+; COMMON-NEXT:      i8 2, label %[[EXIT:.*]]
+; COMMON-NEXT:    ]
+; COMMON:       [[LOOP_LATCH]]:
+; COMMON-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT:    ret i1 [[C_3]]
+; COMMON:       [[EXIT_1]]:
+; COMMON-NEXT:    ret i1 false
+; COMMON:       [[EXIT_2]]:
+; COMMON-NEXT:    ret i1 true
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  switch i8 %l, label %loop.latch [
+  i8 0, label %exit.1
+  i8 1, label %exit.2
+  i8 2, label %exit ]
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+
+exit.1:
+  ret i1 0
+
+exit.2:
+  ret i1 1
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_small_max_tc(ptr %vec, i8 %tgt, i5 %n5) {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_small_max_tc(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]], i5 [[N5:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT:  [[ENTRY:.*:]]
+; COMMON-NEXT:    [[C_0:%.*]] = icmp sgt i5 [[N5]], 0
+; COMMON-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT:    br i1 [[C_0]], label %[[LOOP_PH:.*]], label %[[EXIT:.*]]
+; COMMON:       [[LOOP_PH]]:
+; COMMON-NEXT:    [[N64:%.*]] = zext i5 [[N5]] to i64
+; COMMON-NEXT:    [[END:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 [[N64]]
+; COMMON-NEXT:    br label %[[LOOP_HEADER:.*]]
+; COMMON:       [[LOOP_HEADER]]:
+; COMMON-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[LOOP_PH]] ]
+; COMMON-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[LOOP_PH]] ]
+; COMMON-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; COMMON-NEXT:    br i1 [[C_1]], label %[[LOOP_EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON:       [[LOOP_LATCH]]:
+; COMMON-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; COMMON-NEXT:    [[C_2:%.*]] = icmp eq i64 [[IV_NEXT]], [[N64]]
+; COMMON-NEXT:    br i1 [[C_2]], label %[[LOOP_EXIT]], label %[[LOOP_HEADER]]
+; COMMON:       [[LOOP_EXIT]]:
+; COMMON-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT:    ret i1 [[C_3]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    ret i1 false
+;
+entry:
+  %c.0 = icmp sgt i5 %n5, 0
+  %start = load ptr, ptr %vec, align 8
+  br i1 %c.0, label %loop.ph, label %exit
+
+loop.ph:
+  %n64 = zext i5 %n5 to i64
+  %end = getelementptr inbounds nuw i8, ptr %start, i64 %n64
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %loop.ph ]
+  %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %loop.ph ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %c.1 = icmp eq i8 %l, %tgt
+  br i1 %c.1, label %loop.exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %c.2 = icmp eq i64 %iv.next, %n64
+  br i1 %c.2, label %loop.exit, label %loop.header
+
+loop.exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+
+exit:
+  ret i1 false
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_invalid_insn(ptr %vec, i8 %tgt) #0 {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_invalid_insn(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR1:[0-9]+]] {
+; COMMON-NEXT:  [[ENTRY:.*]]:
+; COMMON-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; COMMON-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT:    br label %[[LOOP_HEADER:.*]]
+; COMMON:       [[LOOP_HEADER]]:
+; COMMON-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT:    [[DEINTER:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> poison)
+; COMMON-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; COMMON-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON:       [[LOOP_LATCH]]:
+; COMMON-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON:       [[EXIT]]:
+; COMMON-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %deinter = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> poison)
+  %c.1 = icmp eq i8 %l, %tgt
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+
+declare void @llvm.assume(i1 noundef)
+declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
+
+attributes #0 = { "target-features"="-sve,-sve2" }

>From 9cfa550feaaec8095e5734b8a3fbb5e7ea3ff41f Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Thu, 20 Mar 2025 13:31:31 +0000
Subject: [PATCH 2/2] Address review comment

---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  95 ++++-----
 .../AArch64/apple-unrolling-multi-exit.ll     | 194 ++++++++++++++++++
 2 files changed, 242 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d3850217f1358..25f5424f5be94 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4258,20 +4258,29 @@ static bool shouldUnrollLoopWithInstruction(Instruction &I,
   return true;
 }
 
-static bool shouldUnrollSmallMultiExitLoop(Loop *L, ScalarEvolution &SE,
-                                           AArch64TTIImpl &TTI) {
-  // Small search loops with multiple exits can be highly beneficial to unroll.
-  // We only care about loops with exactly two exiting blocks, although each
-  // block could jump to the same exit block.
-  SmallVector<BasicBlock *> Blocks(L->getBlocks());
-  if (Blocks.size() != 2 || L->getExitingBlock())
-    return false;
+static InstructionCost getSizeOfLoop(Loop *L, AArch64TTIImpl &TTI) {
+  // Estimate the size of the loop.
+  InstructionCost Size = 0;
+  for (auto *BB : L->getBlocks()) {
+    for (auto &I : *BB) {
+      if (!shouldUnrollLoopWithInstruction(I, TTI))
+        return InstructionCost::getInvalid();
 
-  if (any_of(Blocks, [](BasicBlock *BB) {
-        return !isa<BranchInst>(BB->getTerminator());
-      }))
-    return false;
+      SmallVector<const Value *, 4> Operands(I.operand_values());
+      InstructionCost Cost =
+          TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
+      // This can happen with intrinsics that don't currently have a cost model
+      // or for some operations that require SVE.
+      if (!Cost.isValid())
+        return InstructionCost::getInvalid();
+      Size += *Cost.getValue();
+    }
+  }
+  return Size;
+}
 
+static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
+                                      AArch64TTIImpl &TTI) {
   // Only consider loops with unknown trip counts for which we can determine
   // a symbolic expression. Multi-exit loops with small known trip counts will
   // likely be unrolled anyway.
@@ -4285,25 +4294,27 @@ static bool shouldUnrollSmallMultiExitLoop(Loop *L, ScalarEvolution &SE,
   if (MaxTC > 0 && MaxTC <= 32)
     return false;
 
+  if (findStringMetadataForLoop(L, "llvm.loop.isvectorized"))
+    return false;
+
   // Estimate the size of the loop.
-  int64_t Size = 0;
-  for (auto *BB : L->getBlocks()) {
-    for (auto &I : *BB) {
-      if (!shouldUnrollLoopWithInstruction(I, TTI))
-        return false;
+  InstructionCost Size = getSizeOfLoop(L, TTI);
+  if (!Size.isValid())
+    return false;
 
-      SmallVector<const Value *, 4> Operands(I.operand_values());
-      InstructionCost Cost =
-          TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
-      // This can happen with intrinsics that don't currently have a cost model
-      // or for some operations that require SVE.
-      if (!Cost.isValid())
-        return false;
-      Size += *Cost.getValue();
-    }
-  }
+  // Small search loops with multiple exits can be highly beneficial to unroll.
+  // We only care about loops with exactly two exiting blocks, although each
+  // block could jump to the same exit block.
+  SmallVector<BasicBlock *> Blocks(L->getBlocks());
+  if (Blocks.size() != 2)
+    return false;
+
+  if (any_of(Blocks, [](BasicBlock *BB) {
+        return !isa<BranchInst>(BB->getTerminator());
+      }))
+    return false;
 
-  return Size < 6;
+  return *Size.getValue() < 6;
 }
 
 /// For Apple CPUs, we want to runtime-unroll loops to make better use if the
@@ -4339,24 +4350,9 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
     }
   }
 
-  // Small search loops with multiple exits can be highly beneficial to unroll.
-  if (!L->getExitBlock()) {
-    if (L->getNumBlocks() == 2 && Size < 6 &&
-        all_of(
-            L->getBlocks(),
-            [](BasicBlock *BB) {
-              return isa<BranchInst>(BB->getTerminator());
-            })) {
-      UP.RuntimeUnrollMultiExit = true;
-      UP.Runtime = true;
-      // Limit unroll count.
-      UP.DefaultUnrollRuntimeCount = 4;
-      // Allow slightly more costly trip-count expansion to catch search loops
-      // with pointer inductions.
-      UP.SCEVExpansionBudget = 5;
-    }
+  // This is handled by common code.
+  if (!L->getExitBlock())
     return;
-  }
 
   if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
     return;
@@ -4466,12 +4462,15 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   UP.PartialOptSizeThreshold = 0;
 
   // Apply subtarget-specific unrolling preferences.
+  unsigned SmallMultiExitLoopUnrollFactor = SmallMultiExitLoopUF;
   switch (ST->getProcFamily()) {
   case AArch64Subtarget::AppleA14:
   case AArch64Subtarget::AppleA15:
   case AArch64Subtarget::AppleA16:
   case AArch64Subtarget::AppleM4:
     getAppleRuntimeUnrollPreferences(L, SE, UP, *this);
+    if (!SmallMultiExitLoopUF.getNumOccurrences())
+      SmallMultiExitLoopUnrollFactor = 4;
     break;
   case AArch64Subtarget::Falkor:
     if (EnableFalkorHWPFUnrollFix)
@@ -4481,14 +4480,16 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     break;
   }
 
-  if (SmallMultiExitLoopUF && shouldUnrollSmallMultiExitLoop(L, SE, *this)) {
+  if (!L->getExitBlock() && SmallMultiExitLoopUnrollFactor &&
+      shouldUnrollMultiExitLoop(L, SE, *this)) {
     UP.RuntimeUnrollMultiExit = true;
     UP.Runtime = true;
     // Limit unroll count.
-    UP.DefaultUnrollRuntimeCount = SmallMultiExitLoopUF;
+    UP.DefaultUnrollRuntimeCount = SmallMultiExitLoopUnrollFactor;
     // Allow slightly more costly trip-count expansion to catch search loops
     // with pointer inductions.
     UP.SCEVExpansionBudget = 5;
+    return;
   }
 
   // Scan the loop: don't unroll loops with calls as this could prevent
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
index 31b23eae0f866..5177a35f8d36b 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
@@ -3,6 +3,7 @@
 ; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
 ; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
 ; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m4 -small-multi-exit-loop-unroll-factor=2 -S %s | FileCheck --check-prefix=UNROLL2 %s
 ; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
 
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
@@ -86,6 +87,61 @@ define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
 ; APPLE-NEXT:    [[C_5:%.*]] = icmp eq ptr [[RES1]], [[END]]
 ; APPLE-NEXT:    ret i1 [[C_5]]
 ;
+; UNROLL2-LABEL: define i1 @multi_2_exit_find_i8_loop(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT:    [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; UNROLL2-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; UNROLL2-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; UNROLL2-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
+; UNROLL2-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT:    [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
+; UNROLL2-NEXT:    [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2:       [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
+; UNROLL2-NEXT:    br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2:       [[ENTRY_NEW]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT:    [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT:    [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2:       [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
+; UNROLL2-NEXT:    [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2:       [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT:    br label %[[EXIT]]
+; UNROLL2:       [[EXIT]]:
+; UNROLL2-NEXT:    [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT:    ret i1 [[C_3]]
+;
 ; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
 ; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
 ; OTHER-NEXT:  [[ENTRY:.*]]:
@@ -215,6 +271,67 @@ define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
 ; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
 ; APPLE-NEXT:    ret i1 [[C_3]]
 ;
+; UNROLL2-LABEL: define i1 @multi_2_exit_find_ptr_loop(
+; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
+; UNROLL2-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; UNROLL2-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; UNROLL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; UNROLL2-NEXT:    [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; UNROLL2-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; UNROLL2-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP4]], 1
+; UNROLL2-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2:       [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT:    [[L_PROL:%.*]] = load ptr, ptr [[START]], align 8
+; UNROLL2-NEXT:    [[C_1_PROL:%.*]] = icmp eq ptr [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2:       [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 8
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 1
+; UNROLL2-NEXT:    br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2:       [[ENTRY_NEW]]:
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; UNROLL2-NEXT:    [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT:    [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; UNROLL2-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2:       [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 8
+; UNROLL2-NEXT:    [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2:       [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT:    br label %[[EXIT]]
+; UNROLL2:       [[EXIT]]:
+; UNROLL2-NEXT:    [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT:    ret i1 [[C_3]]
+;
 ; OTHER-LABEL: define i1 @multi_2_exit_find_ptr_loop(
 ; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
 ; OTHER-NEXT:  [[ENTRY:.*]]:
@@ -289,6 +406,29 @@ define i1 @multi_2_exit_find_i8_loop_too_large(ptr %vec, i8 %tgt) {
 ; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
 ; APPLE-NEXT:    ret i1 [[C_3]]
 ;
+; UNROLL2-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; UNROLL2-NEXT:    [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; UNROLL2-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EXIT]]:
+; UNROLL2-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; UNROLL2-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT:    ret i1 [[C_3]]
+;
 ; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
 ; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
 ; OTHER-NEXT:  [[ENTRY:.*]]:
@@ -363,6 +503,32 @@ define i1 @multi_3_exit_find_ptr_loop(ptr %vec, ptr %tgt, ptr %tgt2) {
 ; APPLE-NEXT:    [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
 ; APPLE-NEXT:    ret i1 [[C_4]]
 ;
+; UNROLL2-LABEL: define i1 @multi_3_exit_find_ptr_loop(
+; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; UNROLL2-NEXT:    [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
+; UNROLL2-NEXT:    [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
+; UNROLL2-NEXT:    br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; UNROLL2-NEXT:    [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EXIT]]:
+; UNROLL2-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; UNROLL2-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT:    [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT:    ret i1 [[C_4]]
+;
 ; OTHER-LABEL: define i1 @multi_3_exit_find_ptr_loop(
 ; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
 ; OTHER-NEXT:  [[ENTRY:.*]]:
@@ -448,6 +614,34 @@ define i1 @multi_3_exit_find_i8_loop_switch(ptr %vec, i8 %tgt) {
 ; APPLE:       [[EXIT_2]]:
 ; APPLE-NEXT:    ret i1 true
 ;
+; UNROLL2-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT:  [[ENTRY:.*]]:
+; UNROLL2-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT:    br label %[[LOOP_HEADER:.*]]
+; UNROLL2:       [[LOOP_HEADER]]:
+; UNROLL2-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; UNROLL2-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT:    switch i8 [[L]], label %[[LOOP_LATCH]] [
+; UNROLL2-NEXT:      i8 0, label %[[EXIT_1:.*]]
+; UNROLL2-NEXT:      i8 1, label %[[EXIT_2:.*]]
+; UNROLL2-NEXT:      i8 2, label %[[EXIT:.*]]
+; UNROLL2-NEXT:    ]
+; UNROLL2:       [[LOOP_LATCH]]:
+; UNROLL2-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; UNROLL2-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; UNROLL2:       [[EXIT]]:
+; UNROLL2-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; UNROLL2-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT:    ret i1 [[C_3]]
+; UNROLL2:       [[EXIT_1]]:
+; UNROLL2-NEXT:    ret i1 false
+; UNROLL2:       [[EXIT_2]]:
+; UNROLL2-NEXT:    ret i1 true
+;
 ; OTHER-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
 ; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
 ; OTHER-NEXT:  [[ENTRY:.*]]:



More information about the llvm-commits mailing list