[llvm] [AArch64] Add flag to control unrolling for small multi-exit loops (PR #131998)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 03:27:41 PDT 2025
https://github.com/david-arm created https://github.com/llvm/llvm-project/pull/131998
It can be highly beneficial to unroll small, two-block search loops
that look for a value in an array. An example of this would be
something that uses std::find to find a value in libc++. Older
versions of std::find in the libstdc++ headers are manually unrolled
in the source code, but this might change in newer releases where
the compiler is expected to either vectorise or unroll itself.
This patch adds a new flag -small-multi-exit-loop-unroll-factor
that controls the amount of unrolling for such loops. This is
currently off by default, but in a future patch I plan to enable
this for some targets along with details of any performance
improvements.
>From 64c9960791297b6137f3dac59ed1d9a33f140d67 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Wed, 19 Mar 2025 10:22:25 +0000
Subject: [PATCH] [AArch64] Add flag to control unrolling for small multi-exit
loops
It can be highly beneficial to unroll small, two-block search loops
that look for a value in an array. An example of this would be
something that uses std::find to find a value in libc++. Older
versions of std::find in the libstdc++ headers are manually unrolled
in the source code, but this might change in newer releases where
the compiler is expected to either vectorise or unroll itself.
This patch adds a new flag -small-multi-exit-loop-unroll-factor
that controls the amount of unrolling for such loops. This is
currently off by default, but in a future patch I plan to enable
this for some targets along with details of any performance
improvements.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 90 ++-
.../AArch64/unrolling-multi-exit.ll | 713 ++++++++++++++++++
2 files changed, 793 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7cec8a17dfaaa..d3850217f1358 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -68,6 +68,11 @@ static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
cl::init(true), cl::Hidden);
+static cl::opt<unsigned> SmallMultiExitLoopUF(
+ "small-multi-exit-loop-unroll-factor", cl::init(0), cl::Hidden,
+ cl::desc(
+ "Force unrolling of small multi-exit loops with given unroll factor"));
+
// A complete guess as to a reasonable cost.
static cl::opt<unsigned>
BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
@@ -4237,6 +4242,70 @@ getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
}
+static bool shouldUnrollLoopWithInstruction(Instruction &I,
+ AArch64TTIImpl &TTI) {
+ // Don't unroll vectorised loop.
+ if (I.getType()->isVectorTy())
+ return false;
+
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction())
+ if (!TTI.isLoweredToCall(F))
+ return true;
+ return false;
+ }
+
+ return true;
+}
+
+static bool shouldUnrollSmallMultiExitLoop(Loop *L, ScalarEvolution &SE,
+ AArch64TTIImpl &TTI) {
+ // Small search loops with multiple exits can be highly beneficial to unroll.
+ // We only care about loops with exactly two exiting blocks, although each
+ // block could jump to the same exit block.
+ SmallVector<BasicBlock *> Blocks(L->getBlocks());
+ if (Blocks.size() != 2 || L->getExitingBlock())
+ return false;
+
+ if (any_of(Blocks, [](BasicBlock *BB) {
+ return !isa<BranchInst>(BB->getTerminator());
+ }))
+ return false;
+
+ // Only consider loops with unknown trip counts for which we can determine
+ // a symbolic expression. Multi-exit loops with small known trip counts will
+ // likely be unrolled anyway.
+ const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+ if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC))
+ return false;
+
+ // It might not be worth unrolling loops with low max trip counts. Restrict
+ // this to max trip counts > 32 for now.
+ unsigned MaxTC = SE.getSmallConstantMaxTripCount(L);
+ if (MaxTC > 0 && MaxTC <= 32)
+ return false;
+
+ // Estimate the size of the loop.
+ int64_t Size = 0;
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ if (!shouldUnrollLoopWithInstruction(I, TTI))
+ return false;
+
+ SmallVector<const Value *, 4> Operands(I.operand_values());
+ InstructionCost Cost =
+ TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
+ // This can happen with intrinsics that don't currently have a cost model
+ // or for some operations that require SVE.
+ if (!Cost.isValid())
+ return false;
+ Size += *Cost.getValue();
+ }
+ }
+
+ return Size < 6;
+}
+
/// For Apple CPUs, we want to runtime-unroll loops to make better use if the
/// OOO engine's wide instruction window and various predictors.
static void
@@ -4412,22 +4481,23 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
break;
}
+ if (SmallMultiExitLoopUF && shouldUnrollSmallMultiExitLoop(L, SE, *this)) {
+ UP.RuntimeUnrollMultiExit = true;
+ UP.Runtime = true;
+ // Limit unroll count.
+ UP.DefaultUnrollRuntimeCount = SmallMultiExitLoopUF;
+ // Allow slightly more costly trip-count expansion to catch search loops
+ // with pointer inductions.
+ UP.SCEVExpansionBudget = 5;
+ }
+
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining. Don't unroll vector loops either, as they don't benefit much from
// unrolling.
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
- // Don't unroll vectorised loop.
- if (I.getType()->isVectorTy())
+ if (!shouldUnrollLoopWithInstruction(I, *this))
return;
-
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
- if (!isLoweredToCall(F))
- continue;
- }
- return;
- }
}
}
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll
new file mode 100644
index 0000000000000..b799b4328400a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll
@@ -0,0 +1,713 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-unroll -mcpu=generic -small-multi-exit-loop-unroll-factor=2 -S %s | FileCheck --check-prefixes=COMMON,UNROLL2 %s
+; RUN: opt -p loop-unroll -mcpu=generic -S %s | FileCheck --check-prefixes=COMMON,GENERIC %s
+
+target triple = "aarch64-linux-gnu"
+
+define i1 @multi_2_exiting_find_i8_loop_same_exit(ptr %vec, i8 %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_i8_loop_same_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; UNROLL2-NEXT: [[ENTRY:.*]]:
+; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; UNROLL2-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; UNROLL2-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
+; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT: [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
+; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2: [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
+; UNROLL2-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2: [[ENTRY_NEW]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
+; UNROLL2: [[LOOP_HEADER]]:
+; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2: [[LOOP_LATCH]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2: [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
+; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT: br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2: [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT: [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT: br label %[[EXIT]]
+; UNROLL2: [[EXIT]]:
+; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT: ret i1 [[C_3]]
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_i8_loop_same_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; GENERIC-NEXT: [[ENTRY:.*]]:
+; GENERIC-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; GENERIC-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT: br label %[[LOOP_HEADER:.*]]
+; GENERIC: [[LOOP_HEADER]]:
+; GENERIC-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; GENERIC-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC: [[LOOP_LATCH]]:
+; GENERIC-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; GENERIC-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; GENERIC: [[EXIT]]:
+; GENERIC-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; GENERIC-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; GENERIC-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %c.1 = icmp eq i8 %l, %tgt
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_diff_exit(ptr %vec, i8 %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_i8_loop_diff_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT: [[ENTRY:.*]]:
+; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; UNROLL2-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; UNROLL2-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
+; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT: [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
+; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2: [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
+; UNROLL2-NEXT: br i1 [[TMP3]], label %[[LATCH_EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2: [[ENTRY_NEW]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
+; UNROLL2: [[LOOP_HEADER]]:
+; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1]], label %[[EARLY_EXIT_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2: [[LOOP_LATCH]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EARLY_EXIT_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2: [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
+; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[LATCH_EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]]
+; UNROLL2: [[EARLY_EXIT_LOOPEXIT]]:
+; UNROLL2-NEXT: br label %[[EARLY_EXIT]]
+; UNROLL2: [[EARLY_EXIT]]:
+; UNROLL2-NEXT: ret i1 true
+; UNROLL2: [[LATCH_EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT: br label %[[LATCH_EXIT]]
+; UNROLL2: [[LATCH_EXIT]]:
+; UNROLL2-NEXT: ret i1 false
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_i8_loop_diff_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; GENERIC-NEXT: [[ENTRY:.*]]:
+; GENERIC-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; GENERIC-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT: br label %[[LOOP_HEADER:.*]]
+; GENERIC: [[LOOP_HEADER]]:
+; GENERIC-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; GENERIC-NEXT: br i1 [[C_1]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC: [[LOOP_LATCH]]:
+; GENERIC-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; GENERIC-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT: br i1 [[C_2]], label %[[LATCH_EXIT:.*]], label %[[LOOP_HEADER]]
+; GENERIC: [[EARLY_EXIT]]:
+; GENERIC-NEXT: ret i1 true
+; GENERIC: [[LATCH_EXIT]]:
+; GENERIC-NEXT: ret i1 false
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %c.1 = icmp eq i8 %l, %tgt
+ br i1 %c.1, label %early.exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %latch.exit, label %loop.header
+
+early.exit:
+ ret i1 1
+
+latch.exit:
+ ret i1 0
+}
+
+
+define i1 @multi_2_exiting_find_ptr_loop_same_exit(ptr %vec, ptr %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_ptr_loop_same_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT: [[ENTRY:.*]]:
+; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
+; UNROLL2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; UNROLL2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; UNROLL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; UNROLL2-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; UNROLL2-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 1
+; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT: [[L_PROL:%.*]] = load ptr, ptr [[START]], align 8
+; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq ptr [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2: [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 8
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 1
+; UNROLL2-NEXT: br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2: [[ENTRY_NEW]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
+; UNROLL2: [[LOOP_HEADER]]:
+; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2: [[LOOP_LATCH]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; UNROLL2-NEXT: [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2: [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 8
+; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT: br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2: [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT: [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT: br label %[[EXIT]]
+; UNROLL2: [[EXIT]]:
+; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT: ret i1 [[C_3]]
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_ptr_loop_same_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; GENERIC-NEXT: [[ENTRY:.*]]:
+; GENERIC-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; GENERIC-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; GENERIC-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; GENERIC-NEXT: br label %[[LOOP_HEADER:.*]]
+; GENERIC: [[LOOP_HEADER]]:
+; GENERIC-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; GENERIC-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC: [[LOOP_LATCH]]:
+; GENERIC-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; GENERIC-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; GENERIC: [[EXIT]]:
+; GENERIC-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; GENERIC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; GENERIC-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; GENERIC-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+ %end = load ptr, ptr %gep.end, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load ptr, ptr %ptr.iv, align 8
+ %c.1 = icmp eq ptr %l, %tgt
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+
+
+define ptr @multi_2_exiting_find_ptr_loop_diff_exit(ptr %vec, ptr %tgt) {
+; UNROLL2-LABEL: define ptr @multi_2_exiting_find_ptr_loop_diff_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT: [[ENTRY:.*]]:
+; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT: [[START3:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT: [[TMP0:%.*]] = add i64 [[END2]], -8
+; UNROLL2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START3]]
+; UNROLL2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; UNROLL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; UNROLL2-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; UNROLL2-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 1
+; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT: [[L_PROL:%.*]] = load ptr, ptr [[START]], align 8
+; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq ptr [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2: [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 8
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 1
+; UNROLL2-NEXT: br i1 [[TMP6]], label %[[LATCH_EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2: [[ENTRY_NEW]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
+; UNROLL2: [[LOOP_HEADER]]:
+; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1]], label %[[EARLY_EXIT_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2: [[LOOP_LATCH]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; UNROLL2-NEXT: [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EARLY_EXIT_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2: [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 8
+; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[LATCH_EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]]
+; UNROLL2: [[EARLY_EXIT_LOOPEXIT]]:
+; UNROLL2-NEXT: [[L_LCSSA_PH:%.*]] = phi ptr [ [[L]], %[[LOOP_HEADER]] ], [ [[L_1]], %[[LOOP_LATCH]] ]
+; UNROLL2-NEXT: br label %[[EARLY_EXIT]]
+; UNROLL2: [[EARLY_EXIT]]:
+; UNROLL2-NEXT: [[L_LCSSA:%.*]] = phi ptr [ [[L_PROL]], %[[LOOP_HEADER_PROL]] ], [ [[L_LCSSA_PH]], %[[EARLY_EXIT_LOOPEXIT]] ]
+; UNROLL2-NEXT: ret ptr [[L_LCSSA]]
+; UNROLL2: [[LATCH_EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT: br label %[[LATCH_EXIT]]
+; UNROLL2: [[LATCH_EXIT]]:
+; UNROLL2-NEXT: ret ptr [[END]]
+;
+; GENERIC-LABEL: define ptr @multi_2_exiting_find_ptr_loop_diff_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; GENERIC-NEXT: [[ENTRY:.*]]:
+; GENERIC-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; GENERIC-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; GENERIC-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; GENERIC-NEXT: br label %[[LOOP_HEADER:.*]]
+; GENERIC: [[LOOP_HEADER]]:
+; GENERIC-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; GENERIC-NEXT: br i1 [[C_1]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC: [[LOOP_LATCH]]:
+; GENERIC-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; GENERIC-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT: br i1 [[C_2]], label %[[LATCH_EXIT:.*]], label %[[LOOP_HEADER]]
+; GENERIC: [[EARLY_EXIT]]:
+; GENERIC-NEXT: [[L_LCSSA:%.*]] = phi ptr [ [[L]], %[[LOOP_HEADER]] ]
+; GENERIC-NEXT: ret ptr [[L_LCSSA]]
+; GENERIC: [[LATCH_EXIT]]:
+; GENERIC-NEXT: ret ptr [[END]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+ %end = load ptr, ptr %gep.end, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load ptr, ptr %ptr.iv, align 8
+ %c.1 = icmp eq ptr %l, %tgt
+ br i1 %c.1, label %early.exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %latch.exit, label %loop.header
+
+early.exit:
+ ret ptr %l
+
+latch.exit:
+ ret ptr %end
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_too_large(ptr %vec, i8 %tgt) {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_too_large(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; COMMON-NEXT: [[ENTRY:.*]]:
+; COMMON-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; COMMON-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT: br label %[[LOOP_HEADER:.*]]
+; COMMON: [[LOOP_HEADER]]:
+; COMMON-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT: [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; COMMON-NEXT: [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; COMMON-NEXT: [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; COMMON-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON: [[LOOP_LATCH]]:
+; COMMON-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON: [[EXIT]]:
+; COMMON-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %udiv = udiv i8 %l, %tgt
+ %udiv.2 = udiv i8 %udiv, 10
+ %c.1 = icmp eq i8 %udiv.2, 2
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+
+
+define i1 @multi_3_exiting_find_ptr_loop(ptr %vec, ptr %tgt, ptr %tgt2) {
+; COMMON-LABEL: define i1 @multi_3_exiting_find_ptr_loop(
+; COMMON-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[ENTRY:.*]]:
+; COMMON-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; COMMON-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; COMMON-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; COMMON-NEXT: br label %[[LOOP_HEADER:.*]]
+; COMMON: [[LOOP_HEADER]]:
+; COMMON-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; COMMON-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; COMMON-NEXT: [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
+; COMMON-NEXT: [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
+; COMMON-NEXT: br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON: [[LOOP_LATCH]]:
+; COMMON-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; COMMON-NEXT: [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT: br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON: [[EXIT]]:
+; COMMON-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; COMMON-NEXT: [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT: ret i1 [[C_4]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+ %end = load ptr, ptr %gep.end, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load ptr, ptr %ptr.iv, align 8
+ %c.1 = icmp eq ptr %l, %tgt
+ br i1 %c.1, label %exit, label %then
+
+then:
+ %c.2 = icmp eq ptr %l, %tgt2
+ br i1 %c.2, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+ %c.3 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.3, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %ptr.iv, %then], [ %end, %loop.latch ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ %c.4 = icmp eq ptr %res, %end
+ ret i1 %c.4
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_switch(ptr %vec, i8 %tgt) {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_switch(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[ENTRY:.*]]:
+; COMMON-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; COMMON-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT: br label %[[LOOP_HEADER:.*]]
+; COMMON: [[LOOP_HEADER]]:
+; COMMON-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT: switch i8 [[L]], label %[[LOOP_LATCH]] [
+; COMMON-NEXT: i8 0, label %[[EXIT_1:.*]]
+; COMMON-NEXT: i8 1, label %[[EXIT_2:.*]]
+; COMMON-NEXT: i8 2, label %[[EXIT:.*]]
+; COMMON-NEXT: ]
+; COMMON: [[LOOP_LATCH]]:
+; COMMON-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON: [[EXIT]]:
+; COMMON-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT: ret i1 [[C_3]]
+; COMMON: [[EXIT_1]]:
+; COMMON-NEXT: ret i1 false
+; COMMON: [[EXIT_2]]:
+; COMMON-NEXT: ret i1 true
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ switch i8 %l, label %loop.latch [
+ i8 0, label %exit.1
+ i8 1, label %exit.2
+ i8 2, label %exit ]
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+
+exit.1:
+ ret i1 0
+
+exit.2:
+ ret i1 1
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_small_max_tc(ptr %vec, i8 %tgt, i5 %n5) {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_small_max_tc(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]], i5 [[N5:%.*]]) #[[ATTR0]] {
+; COMMON-NEXT: [[ENTRY:.*:]]
+; COMMON-NEXT: [[C_0:%.*]] = icmp sgt i5 [[N5]], 0
+; COMMON-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT: br i1 [[C_0]], label %[[LOOP_PH:.*]], label %[[EXIT:.*]]
+; COMMON: [[LOOP_PH]]:
+; COMMON-NEXT: [[N64:%.*]] = zext i5 [[N5]] to i64
+; COMMON-NEXT: [[END:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 [[N64]]
+; COMMON-NEXT: br label %[[LOOP_HEADER:.*]]
+; COMMON: [[LOOP_HEADER]]:
+; COMMON-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[LOOP_PH]] ]
+; COMMON-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[LOOP_PH]] ]
+; COMMON-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; COMMON-NEXT: br i1 [[C_1]], label %[[LOOP_EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON: [[LOOP_LATCH]]:
+; COMMON-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; COMMON-NEXT: [[C_2:%.*]] = icmp eq i64 [[IV_NEXT]], [[N64]]
+; COMMON-NEXT: br i1 [[C_2]], label %[[LOOP_EXIT]], label %[[LOOP_HEADER]]
+; COMMON: [[LOOP_EXIT]]:
+; COMMON-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT: ret i1 [[C_3]]
+; COMMON: [[EXIT]]:
+; COMMON-NEXT: ret i1 false
+;
+entry:
+ %c.0 = icmp sgt i5 %n5, 0
+ %start = load ptr, ptr %vec, align 8
+ br i1 %c.0, label %loop.ph, label %exit
+
+loop.ph:
+ %n64 = zext i5 %n5 to i64
+ %end = getelementptr inbounds nuw i8, ptr %start, i64 %n64
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %loop.ph ]
+ %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %loop.ph ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %c.1 = icmp eq i8 %l, %tgt
+ br i1 %c.1, label %loop.exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %c.2 = icmp eq i64 %iv.next, %n64
+ br i1 %c.2, label %loop.exit, label %loop.header
+
+loop.exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+
+exit:
+ ret i1 false
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_invalid_insn(ptr %vec, i8 %tgt) #0 {
+; COMMON-LABEL: define i1 @multi_2_exiting_find_i8_loop_invalid_insn(
+; COMMON-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR1:[0-9]+]] {
+; COMMON-NEXT: [[ENTRY:.*]]:
+; COMMON-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; COMMON-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; COMMON-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; COMMON-NEXT: br label %[[LOOP_HEADER:.*]]
+; COMMON: [[LOOP_HEADER]]:
+; COMMON-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; COMMON-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; COMMON-NEXT: [[DEINTER:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> poison)
+; COMMON-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; COMMON-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; COMMON: [[LOOP_LATCH]]:
+; COMMON-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; COMMON-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; COMMON-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; COMMON: [[EXIT]]:
+; COMMON-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; COMMON-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; COMMON-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %deinter = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> poison)
+ %c.1 = icmp eq i8 %l, %tgt
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+
+
+declare void @llvm.assume(i1 noundef)
+declare {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8>)
+
+attributes #0 = { "target-features"="-sve,-sve2" }
More information about the llvm-commits
mailing list