[llvm] [AArch64] Runtime-unroll small multi-exit loops on Apple Silicon. (PR #124751)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 28 06:16:49 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Extend unrolling preferences to allow more aggressive unrolling of
search loops with 2 exits, building on the TTI hook added in
https://github.com/llvm/llvm-project/commit/ad9da92cf6f735747ef04fd56937e1d76819e503.

In combination with https://github.com/llvm/llvm-project/commit/eac23a5b971362cda3c646e018b9f26d0bc1ff3a this enables unrolling loops like
std::find, which can improve performance significantly (+15% end-to-end
on a workload that makes heavy use of std::find). It increase the total
number of unrolled loops by ~2.5% across a very large corpus of
workloads.

---

Patch is 30.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124751.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+28-7) 
- (added) llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll (+515) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aae2fdaf5bec37..18b5a5beb62387 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4102,15 +4102,14 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
                                  TargetTransformInfo::UnrollingPreferences &UP,
                                  AArch64TTIImpl &TTI) {
   // Limit loops with structure that is highly likely to benefit from runtime
-  // unrolling; that is we exclude outer loops, loops with multiple exits and
-  // many blocks (i.e. likely with complex control flow). Note that the
-  // heuristics here may be overly conservative and we err on the side of
-  // avoiding runtime unrolling rather than unroll excessively. They are all
-  // subject to further refinement.
-  if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
+  // unrolling; that is we exclude outer loops and loops with many blocks (i.e.
+  // likely with complex control flow). Note that the heuristics here may be
+  // overly conservative and we err on the side of avoiding runtime unrolling
+  // rather than unroll excessively. They are all subject to further refinement.
+  if (!L->isInnermost() || L->getNumBlocks() > 8)
     return;
 
-  const SCEV *BTC = SE.getBackedgeTakenCount(L);
+  const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
   if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
       (SE.getSmallConstantMaxTripCount(L) > 0 &&
        SE.getSmallConstantMaxTripCount(L) <= 32))
@@ -4129,6 +4128,28 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
     }
   }
 
+  // Small search loops with multiple exits can be highly beneficial to unroll.
+  if (!L->getExitBlock()) {
+    if (L->getNumBlocks() == 2 && Size < 6 &&
+        all_of(
+            L->getBlocks(),
+            [](BasicBlock *BB) {
+              return isa<BranchInst>(BB->getTerminator());
+            })) {
+      UP.RuntimeUnrollMultiExit = true;
+      UP.Runtime = true;
+      // Limit unroll count.
+      UP.DefaultUnrollRuntimeCount = 4;
+      // Allow slightly more costly trip-count expansion to catch search loops
+      // with pointer inductions.
+      UP.SCEVExpansionBudget = 5;
+    }
+    return;
+  }
+
+  if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
+    return;
+
   // Limit to loops with trip counts that are cheap to expand.
   UP.SCEVExpansionBudget = 1;
 
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
new file mode 100644
index 00000000000000..31b23eae0f8660
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
@@ -0,0 +1,515 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-unroll -mcpu=apple-m1 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx15.0.0"
+
+define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_i8_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; APPLE-NEXT:    [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; APPLE-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; APPLE-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP1]], 3
+; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; APPLE:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; APPLE:       [[LOOP_HEADER_PROL]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
+; APPLE-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
+; APPLE:       [[LOOP_LATCH_PROL]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; APPLE-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; APPLE-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 3
+; APPLE-NEXT:    br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE:       [[ENTRY_NEW]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[RES:%.*]], %[[LOOP_LATCH_3:.*]] ]
+; APPLE-NEXT:    [[L1:%.*]] = load i8, ptr [[PTR_IV1]], align 8
+; APPLE-NEXT:    [[C_4:%.*]] = icmp eq i8 [[L1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 1
+; APPLE-NEXT:    [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT1]], align 8
+; APPLE-NEXT:    [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
+; APPLE:       [[LOOP_LATCH_1]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 1
+; APPLE-NEXT:    [[L_2:%.*]] = load i8, ptr [[PTR_IV_NEXT_1]], align 8
+; APPLE-NEXT:    [[C_1_2:%.*]] = icmp eq i8 [[L_2]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
+; APPLE:       [[LOOP_LATCH_2]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 1
+; APPLE-NEXT:    [[L_3:%.*]] = load i8, ptr [[PTR_IV_NEXT_2]], align 8
+; APPLE-NEXT:    [[C_1_3:%.*]] = icmp eq i8 [[L_3]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
+; APPLE:       [[LOOP_LATCH_3]]:
+; APPLE-NEXT:    [[RES]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 1
+; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT:    br i1 [[C_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA:.*]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
+; APPLE-NEXT:    [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE:       [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
+; APPLE-NEXT:    br label %[[EXIT]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES1:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[C_5:%.*]] = icmp eq ptr [[RES1]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_5]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; OTHER-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT:    br label %[[LOOP_HEADER:.*]]
+; OTHER:       [[LOOP_HEADER]]:
+; OTHER-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; OTHER-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; OTHER-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER:       [[LOOP_LATCH]]:
+; OTHER-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; OTHER-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %c.1 = icmp eq i8 %l, %tgt
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+
+define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_ptr_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
+; APPLE-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; APPLE-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; APPLE-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; APPLE-NEXT:    [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; APPLE-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP4]], 3
+; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; APPLE:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; APPLE:       [[LOOP_HEADER_PROL]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
+; APPLE-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
+; APPLE:       [[LOOP_LATCH_PROL]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; APPLE-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; APPLE-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 3
+; APPLE-NEXT:    br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE:       [[ENTRY_NEW]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_3:%.*]], %[[LOOP_LATCH_3:.*]] ]
+; APPLE-NEXT:    [[L1:%.*]] = load ptr, ptr [[PTR_IV1]], align 8
+; APPLE-NEXT:    [[C_4:%.*]] = icmp eq ptr [[L1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 8
+; APPLE-NEXT:    [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT1]], align 8
+; APPLE-NEXT:    [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
+; APPLE:       [[LOOP_LATCH_1]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 8
+; APPLE-NEXT:    [[L_2:%.*]] = load ptr, ptr [[PTR_IV_NEXT_1]], align 8
+; APPLE-NEXT:    [[C_1_2:%.*]] = icmp eq ptr [[L_2]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
+; APPLE:       [[LOOP_LATCH_2]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 8
+; APPLE-NEXT:    [[L_3:%.*]] = load ptr, ptr [[PTR_IV_NEXT_2]], align 8
+; APPLE-NEXT:    [[C_1_3:%.*]] = icmp eq ptr [[L_3]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
+; APPLE:       [[LOOP_LATCH_3]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_3]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 8
+; APPLE-NEXT:    [[C_2_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT_3]], [[END]]
+; APPLE-NEXT:    br i1 [[C_2_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA:.*]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
+; APPLE-NEXT:    [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE:       [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
+; APPLE-NEXT:    br label %[[EXIT]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_3]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_ptr_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; OTHER-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; OTHER-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT:    br label %[[LOOP_HEADER:.*]]
+; OTHER:       [[LOOP_HEADER]]:
+; OTHER-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; OTHER-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; OTHER-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER:       [[LOOP_LATCH]]:
+; OTHER-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; OTHER-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+  %end = load ptr, ptr %gep.end, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load ptr, ptr %ptr.iv, align 8
+  %c.1 = icmp eq ptr %l, %tgt
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+define i1 @multi_2_exit_find_i8_loop_too_large(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; APPLE-NEXT:    [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; APPLE-NEXT:    [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; APPLE-NEXT:    br i1 [[C_1]], la...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/124751


More information about the llvm-commits mailing list