[llvm] [AArch64] Runtime-unroll small multi-exit loops on Apple Silicon. (PR #124751)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 28 06:16:14 PST 2025


https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/124751

Extend unrolling preferences to allow more aggressive unrolling of
search loops with 2 exits, building on the TTI hook added in
https://github.com/llvm/llvm-project/commit/ad9da92cf6f735747ef04fd56937e1d76819e503.

In combination with https://github.com/llvm/llvm-project/commit/eac23a5b971362cda3c646e018b9f26d0bc1ff3a this enables unrolling loops like
std::find, which can improve performance significantly (+15% end-to-end
on a workload that makes heavy use of std::find). It increase the total
number of unrolled loops by ~2.5% across a very large corpus of
workloads.

>From 0c94f0fbc5e88da4cf0bef129936f07cb2c3e9e0 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 28 Jan 2025 13:50:41 +0000
Subject: [PATCH 1/2] [LoopUnroll] Add AArch64 tests for multi-exit loop
 unrolling.

---
 .../AArch64/apple-unrolling-multi-exit.ll     | 395 ++++++++++++++++++
 1 file changed, 395 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll

diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
new file mode 100644
index 00000000000000..bfcd6f9e32a3b0
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
@@ -0,0 +1,395 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-unroll -mcpu=apple-m1 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx15.0.0"
+
+define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_i8_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_3]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; OTHER-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT:    br label %[[LOOP_HEADER:.*]]
+; OTHER:       [[LOOP_HEADER]]:
+; OTHER-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; OTHER-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; OTHER-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER:       [[LOOP_LATCH]]:
+; OTHER-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; OTHER-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %c.1 = icmp eq i8 %l, %tgt
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+
+define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_ptr_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_3]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_ptr_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; OTHER-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; OTHER-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT:    br label %[[LOOP_HEADER:.*]]
+; OTHER:       [[LOOP_HEADER]]:
+; OTHER-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; OTHER-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; OTHER-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER:       [[LOOP_LATCH]]:
+; OTHER-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; OTHER-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+  %end = load ptr, ptr %gep.end, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load ptr, ptr %ptr.iv, align 8
+  %c.1 = icmp eq ptr %l, %tgt
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+
+define i1 @multi_2_exit_find_i8_loop_too_large(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; APPLE-NEXT:    [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; APPLE-NEXT:    [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_3]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
+; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; OTHER-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT:    br label %[[LOOP_HEADER:.*]]
+; OTHER:       [[LOOP_HEADER]]:
+; OTHER-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; OTHER-NEXT:    [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; OTHER-NEXT:    [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; OTHER-NEXT:    [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; OTHER-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER:       [[LOOP_LATCH]]:
+; OTHER-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; OTHER-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT:    ret i1 [[C_3]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  %udiv = udiv i8 %l, %tgt
+  %udiv.2 = udiv i8 %udiv, 10
+  %c.1 = icmp eq i8 %udiv.2, 2
+  br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+}
+define i1 @multi_3_exit_find_ptr_loop(ptr %vec, ptr %tgt, ptr %tgt2) {
+; APPLE-LABEL: define i1 @multi_3_exit_find_ptr_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
+; APPLE-NEXT:    [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
+; APPLE-NEXT:    br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT:    br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT:    [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_4]]
+;
+; OTHER-LABEL: define i1 @multi_3_exit_find_ptr_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; OTHER-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; OTHER-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT:    br label %[[LOOP_HEADER:.*]]
+; OTHER:       [[LOOP_HEADER]]:
+; OTHER-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; OTHER-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; OTHER-NEXT:    [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
+; OTHER-NEXT:    [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
+; OTHER-NEXT:    br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER:       [[LOOP_LATCH]]:
+; OTHER-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; OTHER-NEXT:    [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT:    br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT:    [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT:    ret i1 [[C_4]]
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+  %end = load ptr, ptr %gep.end, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load ptr, ptr %ptr.iv, align 8
+  %c.1 = icmp eq ptr %l, %tgt
+  br i1 %c.1, label %exit, label %then
+
+then:
+  %c.2 = icmp eq ptr %l, %tgt2
+  br i1 %c.2, label %exit, label %loop.latch
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+  %c.3 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.3, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %ptr.iv, %then], [ %end, %loop.latch ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+  %c.4 = icmp eq ptr %res, %end
+  ret i1 %c.4
+}
+
+define i1 @multi_3_exit_find_i8_loop_switch(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT:  [[ENTRY:.*]]:
+; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT:    switch i8 [[L]], label %[[LOOP_LATCH]] [
+; APPLE-NEXT:      i8 0, label %[[EXIT_1:.*]]
+; APPLE-NEXT:      i8 1, label %[[EXIT_2:.*]]
+; APPLE-NEXT:      i8 2, label %[[EXIT:.*]]
+; APPLE-NEXT:    ]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_3]]
+; APPLE:       [[EXIT_1]]:
+; APPLE-NEXT:    ret i1 false
+; APPLE:       [[EXIT_2]]:
+; APPLE-NEXT:    ret i1 true
+;
+; OTHER-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
+; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT:  [[ENTRY:.*]]:
+; OTHER-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; OTHER-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT:    br label %[[LOOP_HEADER:.*]]
+; OTHER:       [[LOOP_HEADER]]:
+; OTHER-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; OTHER-NEXT:    switch i8 [[L]], label %[[LOOP_LATCH]] [
+; OTHER-NEXT:      i8 0, label %[[EXIT_1:.*]]
+; OTHER-NEXT:      i8 1, label %[[EXIT_2:.*]]
+; OTHER-NEXT:      i8 2, label %[[EXIT:.*]]
+; OTHER-NEXT:    ]
+; OTHER:       [[LOOP_LATCH]]:
+; OTHER-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; OTHER-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER:       [[EXIT]]:
+; OTHER-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT:    ret i1 [[C_3]]
+; OTHER:       [[EXIT_1]]:
+; OTHER-NEXT:    ret i1 false
+; OTHER:       [[EXIT_2]]:
+; OTHER-NEXT:    ret i1 true
+;
+entry:
+  %start = load ptr, ptr %vec, align 8
+  %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+  %end = load ptr, ptr %gep.end, align 8
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+  %l = load i8, ptr %ptr.iv, align 8
+  switch i8 %l, label %loop.latch [
+  i8 0, label %exit.1
+  i8 1, label %exit.2
+  i8 2, label %exit ]
+
+loop.latch:
+  %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+  %c.2 = icmp eq ptr %ptr.iv.next, %end
+  br i1 %c.2, label %exit, label %loop.header
+
+exit:
+  %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+  %c.3 = icmp eq ptr %res, %end
+  ret i1 %c.3
+
+exit.1:
+  ret i1 0
+
+exit.2:
+  ret i1 1
+}
+
+declare void @llvm.assume(i1 noundef)

>From 47d7b739368ecb37051f2f29c0e5d29340529b22 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 28 Jan 2025 09:49:07 +0000
Subject: [PATCH 2/2] [AArch64] Runtime-unroll small multi-exit loops on Apple
 Silicon.

Extend unrolling preferences to allow more aggressive unrolling of
search loops with 2 exits, building on the TTI hook added in
ad9da92cf6f7.

In combination with eac23a5b9 this enables unrolling loops like
std::find, which can improve performance significantly (+15% end-to-end
on a workload that makes heavy use of std::find). It increase the total
number of unrolled loops by ~2.5% across a very large corpus of
workloads.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  35 +++-
 .../AArch64/apple-unrolling-multi-exit.ll     | 152 ++++++++++++++++--
 2 files changed, 164 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aae2fdaf5bec37..18b5a5beb62387 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4102,15 +4102,14 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
                                  TargetTransformInfo::UnrollingPreferences &UP,
                                  AArch64TTIImpl &TTI) {
   // Limit loops with structure that is highly likely to benefit from runtime
-  // unrolling; that is we exclude outer loops, loops with multiple exits and
-  // many blocks (i.e. likely with complex control flow). Note that the
-  // heuristics here may be overly conservative and we err on the side of
-  // avoiding runtime unrolling rather than unroll excessively. They are all
-  // subject to further refinement.
-  if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
+  // unrolling; that is we exclude outer loops and loops with many blocks (i.e.
+  // likely with complex control flow). Note that the heuristics here may be
+  // overly conservative and we err on the side of avoiding runtime unrolling
+  // rather than unroll excessively. They are all subject to further refinement.
+  if (!L->isInnermost() || L->getNumBlocks() > 8)
     return;
 
-  const SCEV *BTC = SE.getBackedgeTakenCount(L);
+  const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
   if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
       (SE.getSmallConstantMaxTripCount(L) > 0 &&
        SE.getSmallConstantMaxTripCount(L) <= 32))
@@ -4129,6 +4128,28 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
     }
   }
 
+  // Small search loops with multiple exits can be highly beneficial to unroll.
+  if (!L->getExitBlock()) {
+    if (L->getNumBlocks() == 2 && Size < 6 &&
+        all_of(
+            L->getBlocks(),
+            [](BasicBlock *BB) {
+              return isa<BranchInst>(BB->getTerminator());
+            })) {
+      UP.RuntimeUnrollMultiExit = true;
+      UP.Runtime = true;
+      // Limit unroll count.
+      UP.DefaultUnrollRuntimeCount = 4;
+      // Allow slightly more costly trip-count expansion to catch search loops
+      // with pointer inductions.
+      UP.SCEVExpansionBudget = 5;
+    }
+    return;
+  }
+
+  if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
+    return;
+
   // Limit to loops with trip counts that are cheap to expand.
   UP.SCEVExpansionBudget = 1;
 
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
index bfcd6f9e32a3b0..31b23eae0f8660 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
@@ -13,22 +13,78 @@ define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
 ; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
 ; APPLE-NEXT:  [[ENTRY:.*]]:
 ; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
 ; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
 ; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
-; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
-; APPLE:       [[LOOP_HEADER]]:
-; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; APPLE-NEXT:    [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; APPLE-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; APPLE-NEXT:    [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP1]], 3
+; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; APPLE:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; APPLE:       [[LOOP_HEADER_PROL]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
+; APPLE-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
 ; APPLE-NEXT:    [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
 ; APPLE-NEXT:    [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
-; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
-; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
+; APPLE:       [[LOOP_LATCH_PROL]]:
 ; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
 ; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
-; APPLE-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; APPLE:       [[EXIT]]:
-; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; APPLE-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; APPLE-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 3
+; APPLE-NEXT:    br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE:       [[ENTRY_NEW]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[RES:%.*]], %[[LOOP_LATCH_3:.*]] ]
+; APPLE-NEXT:    [[L1:%.*]] = load i8, ptr [[PTR_IV1]], align 8
+; APPLE-NEXT:    [[C_4:%.*]] = icmp eq i8 [[L1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 1
+; APPLE-NEXT:    [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT1]], align 8
+; APPLE-NEXT:    [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
+; APPLE:       [[LOOP_LATCH_1]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 1
+; APPLE-NEXT:    [[L_2:%.*]] = load i8, ptr [[PTR_IV_NEXT_1]], align 8
+; APPLE-NEXT:    [[C_1_2:%.*]] = icmp eq i8 [[L_2]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
+; APPLE:       [[LOOP_LATCH_2]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 1
+; APPLE-NEXT:    [[L_3:%.*]] = load i8, ptr [[PTR_IV_NEXT_2]], align 8
+; APPLE-NEXT:    [[C_1_3:%.*]] = icmp eq i8 [[L_3]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
+; APPLE:       [[LOOP_LATCH_3]]:
+; APPLE-NEXT:    [[RES]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 1
 ; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
-; APPLE-NEXT:    ret i1 [[C_3]]
+; APPLE-NEXT:    br i1 [[C_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA:.*]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
+; APPLE-NEXT:    [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE:       [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
+; APPLE-NEXT:    br label %[[EXIT]]
+; APPLE:       [[EXIT]]:
+; APPLE-NEXT:    [[RES1:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[C_5:%.*]] = icmp eq ptr [[RES1]], [[END]]
+; APPLE-NEXT:    ret i1 [[C_5]]
 ;
 ; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
 ; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -80,22 +136,81 @@ define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
 ; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
 ; APPLE-NEXT:  [[ENTRY:.*]]:
 ; APPLE-NEXT:    [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
 ; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
 ; APPLE-NEXT:    [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
 ; APPLE-NEXT:    [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
 ; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
-; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
-; APPLE:       [[LOOP_HEADER]]:
-; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT:    [[TMP0:%.*]] = add i64 [[END1]], -8
+; APPLE-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; APPLE-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; APPLE-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; APPLE-NEXT:    [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; APPLE-NEXT:    [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; APPLE-NEXT:    [[XTRAITER:%.*]] = and i64 [[TMP4]], 3
+; APPLE-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; APPLE:       [[LOOP_HEADER_PROL_PREHEADER]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL:.*]]
+; APPLE:       [[LOOP_HEADER_PROL]]:
+; APPLE-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
+; APPLE-NEXT:    [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
 ; APPLE-NEXT:    [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
 ; APPLE-NEXT:    [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
-; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
-; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
+; APPLE:       [[LOOP_LATCH_PROL]]:
 ; APPLE-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
 ; APPLE-NEXT:    [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
-; APPLE-NEXT:    br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE-NEXT:    [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; APPLE-NEXT:    [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; APPLE-NEXT:    br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT:    br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; APPLE:       [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 3
+; APPLE-NEXT:    br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE:       [[ENTRY_NEW]]:
+; APPLE-NEXT:    br label %[[LOOP_HEADER:.*]]
+; APPLE:       [[LOOP_HEADER]]:
+; APPLE-NEXT:    [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_3:%.*]], %[[LOOP_LATCH_3:.*]] ]
+; APPLE-NEXT:    [[L1:%.*]] = load ptr, ptr [[PTR_IV1]], align 8
+; APPLE-NEXT:    [[C_4:%.*]] = icmp eq ptr [[L1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; APPLE:       [[LOOP_LATCH]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 8
+; APPLE-NEXT:    [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT1]], align 8
+; APPLE-NEXT:    [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
+; APPLE:       [[LOOP_LATCH_1]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 8
+; APPLE-NEXT:    [[L_2:%.*]] = load ptr, ptr [[PTR_IV_NEXT_1]], align 8
+; APPLE-NEXT:    [[C_1_2:%.*]] = icmp eq ptr [[L_2]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
+; APPLE:       [[LOOP_LATCH_2]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 8
+; APPLE-NEXT:    [[L_3:%.*]] = load ptr, ptr [[PTR_IV_NEXT_2]], align 8
+; APPLE-NEXT:    [[C_1_3:%.*]] = icmp eq ptr [[L_3]], [[TGT]]
+; APPLE-NEXT:    br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
+; APPLE:       [[LOOP_LATCH_3]]:
+; APPLE-NEXT:    [[PTR_IV_NEXT_3]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 8
+; APPLE-NEXT:    [[C_2_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT_3]], [[END]]
+; APPLE-NEXT:    br i1 [[C_2_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; APPLE-NEXT:    [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA:.*]]
+; APPLE:       [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
+; APPLE-NEXT:    [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
+; APPLE-NEXT:    br label %[[EXIT_UNR_LCSSA]]
+; APPLE:       [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT:    [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
+; APPLE-NEXT:    br label %[[EXIT]]
 ; APPLE:       [[EXIT]]:
-; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT:    [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
 ; APPLE-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
 ; APPLE-NEXT:    [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
 ; APPLE-NEXT:    ret i1 [[C_3]]
@@ -393,3 +508,8 @@ exit.2:
 }
 
 declare void @llvm.assume(i1 noundef)
+;.
+; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
+; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+;.



More information about the llvm-commits mailing list