[llvm] [AArch64] Add flag to control unrolling for small multi-exit loops (PR #131998)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 03:28:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-aarch64
Author: David Sherwood (david-arm)
<details>
<summary>Changes</summary>
It can be highly beneficial to unroll small, two-block search loops
that look for a value in an array. An example of this would be
something that uses std::find to find a value in libc++. Older
versions of std::find in the libstdc++ headers are manually unrolled
in the source code, but this might change in newer releases where
the compiler is expected to either vectorise or unroll itself.
This patch adds a new flag -small-multi-exit-loop-unroll-factor
that controls the amount of unrolling for such loops. This is
currently off by default, but in a future patch I plan to enable
this for some targets along with details of any performance
improvements.
---
Patch is 40.52 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131998.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+80-10)
- (added) llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll (+713)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7cec8a17dfaaa..d3850217f1358 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -68,6 +68,11 @@ static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
cl::init(true), cl::Hidden);
+static cl::opt<unsigned> SmallMultiExitLoopUF(
+ "small-multi-exit-loop-unroll-factor", cl::init(0), cl::Hidden,
+ cl::desc(
+ "Force unrolling of small multi-exit loops with given unroll factor"));
+
// A complete guess as to a reasonable cost.
static cl::opt<unsigned>
BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
@@ -4237,6 +4242,70 @@ getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
}
+static bool shouldUnrollLoopWithInstruction(Instruction &I,
+ AArch64TTIImpl &TTI) {
+ // Don't unroll vectorised loop.
+ if (I.getType()->isVectorTy())
+ return false;
+
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ if (const Function *F = cast<CallBase>(I).getCalledFunction())
+ if (!TTI.isLoweredToCall(F))
+ return true;
+ return false;
+ }
+
+ return true;
+}
+
+static bool shouldUnrollSmallMultiExitLoop(Loop *L, ScalarEvolution &SE,
+ AArch64TTIImpl &TTI) {
+ // Small search loops with multiple exits can be highly beneficial to unroll.
+ // We only care about loops with exactly two exiting blocks, although each
+ // block could jump to the same exit block.
+ SmallVector<BasicBlock *> Blocks(L->getBlocks());
+ if (Blocks.size() != 2 || L->getExitingBlock())
+ return false;
+
+ if (any_of(Blocks, [](BasicBlock *BB) {
+ return !isa<BranchInst>(BB->getTerminator());
+ }))
+ return false;
+
+ // Only consider loops with unknown trip counts for which we can determine
+ // a symbolic expression. Multi-exit loops with small known trip counts will
+ // likely be unrolled anyway.
+ const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+ if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC))
+ return false;
+
+ // It might not be worth unrolling loops with low max trip counts. Restrict
+ // this to max trip counts > 32 for now.
+ unsigned MaxTC = SE.getSmallConstantMaxTripCount(L);
+ if (MaxTC > 0 && MaxTC <= 32)
+ return false;
+
+ // Estimate the size of the loop.
+ int64_t Size = 0;
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ if (!shouldUnrollLoopWithInstruction(I, TTI))
+ return false;
+
+ SmallVector<const Value *, 4> Operands(I.operand_values());
+ InstructionCost Cost =
+ TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
+ // This can happen with intrinsics that don't currently have a cost model
+ // or for some operations that require SVE.
+ if (!Cost.isValid())
+ return false;
+ Size += *Cost.getValue();
+ }
+ }
+
+ return Size < 6;
+}
+
/// For Apple CPUs, we want to runtime-unroll loops to make better use if the
/// OOO engine's wide instruction window and various predictors.
static void
@@ -4412,22 +4481,23 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
break;
}
+ if (SmallMultiExitLoopUF && shouldUnrollSmallMultiExitLoop(L, SE, *this)) {
+ UP.RuntimeUnrollMultiExit = true;
+ UP.Runtime = true;
+ // Limit unroll count.
+ UP.DefaultUnrollRuntimeCount = SmallMultiExitLoopUF;
+ // Allow slightly more costly trip-count expansion to catch search loops
+ // with pointer inductions.
+ UP.SCEVExpansionBudget = 5;
+ }
+
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining. Don't unroll vector loops either, as they don't benefit much from
// unrolling.
for (auto *BB : L->getBlocks()) {
for (auto &I : *BB) {
- // Don't unroll vectorised loop.
- if (I.getType()->isVectorTy())
+ if (!shouldUnrollLoopWithInstruction(I, *this))
return;
-
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
- if (!isLoweredToCall(F))
- continue;
- }
- return;
- }
}
}
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll
new file mode 100644
index 0000000000000..b799b4328400a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/unrolling-multi-exit.ll
@@ -0,0 +1,713 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-unroll -mcpu=generic -small-multi-exit-loop-unroll-factor=2 -S %s | FileCheck --check-prefixes=COMMON,UNROLL2 %s
+; RUN: opt -p loop-unroll -mcpu=generic -S %s | FileCheck --check-prefixes=COMMON,GENERIC %s
+
+target triple = "aarch64-linux-gnu"
+
+define i1 @multi_2_exiting_find_i8_loop_same_exit(ptr %vec, i8 %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_i8_loop_same_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; UNROLL2-NEXT: [[ENTRY:.*]]:
+; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; UNROLL2-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; UNROLL2-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
+; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT: [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
+; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2: [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
+; UNROLL2-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2: [[ENTRY_NEW]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
+; UNROLL2: [[LOOP_HEADER]]:
+; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2: [[LOOP_LATCH]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2: [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
+; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT: br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2: [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT: [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT: br label %[[EXIT]]
+; UNROLL2: [[EXIT]]:
+; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT: ret i1 [[C_3]]
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_i8_loop_same_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; GENERIC-NEXT: [[ENTRY:.*]]:
+; GENERIC-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; GENERIC-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT: br label %[[LOOP_HEADER:.*]]
+; GENERIC: [[LOOP_HEADER]]:
+; GENERIC-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; GENERIC-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC: [[LOOP_LATCH]]:
+; GENERIC-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; GENERIC-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; GENERIC: [[EXIT]]:
+; GENERIC-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; GENERIC-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; GENERIC-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %c.1 = icmp eq i8 %l, %tgt
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+
+
+define i1 @multi_2_exiting_find_i8_loop_diff_exit(ptr %vec, i8 %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_i8_loop_diff_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT: [[ENTRY:.*]]:
+; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; UNROLL2-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; UNROLL2-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 1
+; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT: [[L_PROL:%.*]] = load i8, ptr [[START]], align 8
+; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq i8 [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2: [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 1
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 1
+; UNROLL2-NEXT: br i1 [[TMP3]], label %[[LATCH_EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2: [[ENTRY_NEW]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
+; UNROLL2: [[LOOP_HEADER]]:
+; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1]], label %[[EARLY_EXIT_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2: [[LOOP_LATCH]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; UNROLL2-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EARLY_EXIT_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2: [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 1
+; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[LATCH_EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]]
+; UNROLL2: [[EARLY_EXIT_LOOPEXIT]]:
+; UNROLL2-NEXT: br label %[[EARLY_EXIT]]
+; UNROLL2: [[EARLY_EXIT]]:
+; UNROLL2-NEXT: ret i1 true
+; UNROLL2: [[LATCH_EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT: br label %[[LATCH_EXIT]]
+; UNROLL2: [[LATCH_EXIT]]:
+; UNROLL2-NEXT: ret i1 false
+;
+; GENERIC-LABEL: define i1 @multi_2_exiting_find_i8_loop_diff_exit(
+; GENERIC-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; GENERIC-NEXT: [[ENTRY:.*]]:
+; GENERIC-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; GENERIC-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; GENERIC-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; GENERIC-NEXT: br label %[[LOOP_HEADER:.*]]
+; GENERIC: [[LOOP_HEADER]]:
+; GENERIC-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; GENERIC-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; GENERIC-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; GENERIC-NEXT: br i1 [[C_1]], label %[[EARLY_EXIT:.*]], label %[[LOOP_LATCH]]
+; GENERIC: [[LOOP_LATCH]]:
+; GENERIC-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; GENERIC-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; GENERIC-NEXT: br i1 [[C_2]], label %[[LATCH_EXIT:.*]], label %[[LOOP_HEADER]]
+; GENERIC: [[EARLY_EXIT]]:
+; GENERIC-NEXT: ret i1 true
+; GENERIC: [[LATCH_EXIT]]:
+; GENERIC-NEXT: ret i1 false
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %c.1 = icmp eq i8 %l, %tgt
+ br i1 %c.1, label %early.exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %latch.exit, label %loop.header
+
+early.exit:
+ ret i1 1
+
+latch.exit:
+ ret i1 0
+}
+
+
+define i1 @multi_2_exiting_find_ptr_loop_same_exit(ptr %vec, ptr %tgt) {
+; UNROLL2-LABEL: define i1 @multi_2_exiting_find_ptr_loop_same_exit(
+; UNROLL2-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; UNROLL2-NEXT: [[ENTRY:.*]]:
+; UNROLL2-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; UNROLL2-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; UNROLL2-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; UNROLL2-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; UNROLL2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
+; UNROLL2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; UNROLL2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; UNROLL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; UNROLL2-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; UNROLL2-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; UNROLL2-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 1
+; UNROLL2-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; UNROLL2-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL_PREHEADER]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; UNROLL2: [[LOOP_HEADER_PROL]]:
+; UNROLL2-NEXT: [[L_PROL:%.*]] = load ptr, ptr [[START]], align 8
+; UNROLL2-NEXT: [[C_1_PROL:%.*]] = icmp eq ptr [[L_PROL]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_PROL]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_LATCH_PROL:.*]]
+; UNROLL2: [[LOOP_LATCH_PROL]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_PROL:%.*]] = getelementptr inbounds nuw i8, ptr [[START]], i64 8
+; UNROLL2-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; UNROLL2: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_PROL]], %[[LOOP_LATCH_PROL]] ]
+; UNROLL2-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 1
+; UNROLL2-NEXT: br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; UNROLL2: [[ENTRY_NEW]]:
+; UNROLL2-NEXT: br label %[[LOOP_HEADER:.*]]
+; UNROLL2: [[LOOP_HEADER]]:
+; UNROLL2-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_1:%.*]], %[[LOOP_LATCH_1:.*]] ]
+; UNROLL2-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; UNROLL2-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; UNROLL2: [[LOOP_LATCH]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; UNROLL2-NEXT: [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT]], align 8
+; UNROLL2-NEXT: [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; UNROLL2-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1]]
+; UNROLL2: [[LOOP_LATCH_1]]:
+; UNROLL2-NEXT: [[PTR_IV_NEXT_1]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT]], i64 8
+; UNROLL2-NEXT: [[C_2_1:%.*]] = icmp eq ptr [[PTR_IV_NEXT_1]], [[END]]
+; UNROLL2-NEXT: br i1 [[C_2_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; UNROLL2: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; UNROLL2-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT]], %[[LOOP_LATCH]] ], [ [[END]], %[[LOOP_LATCH_1]] ]
+; UNROLL2-NEXT: br label %[[EXIT_UNR_LCSSA]]
+; UNROLL2: [[EXIT_UNR_LCSSA]]:
+; UNROLL2-NEXT: [[RES_PH:%.*]] = phi ptr [ [[START]], %[[LOOP_HEADER_PROL]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
+; UNROLL2-NEXT: br label %[[EXIT]]
+; UNROLL2: [[EXIT]]:
+; UNROLL2-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; UNROLL2-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; UNROLL2-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; UNROLL2-NEXT: ret i1 [[C_3]...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131998
More information about the llvm-commits
mailing list