[llvm] [AArch64] Runtime-unroll small multi-exit loops on Apple Silicon. (PR #124751)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 06:16:14 PST 2025
https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/124751
Extend unrolling preferences to allow more aggressive unrolling of
search loops with 2 exits, building on the TTI hook added in
https://github.com/llvm/llvm-project/commit/ad9da92cf6f735747ef04fd56937e1d76819e503.
In combination with https://github.com/llvm/llvm-project/commit/eac23a5b971362cda3c646e018b9f26d0bc1ff3a this enables unrolling loops like
std::find, which can improve performance significantly (+15% end-to-end
on a workload that makes heavy use of std::find). It increase the total
number of unrolled loops by ~2.5% across a very large corpus of
workloads.
>From 0c94f0fbc5e88da4cf0bef129936f07cb2c3e9e0 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 28 Jan 2025 13:50:41 +0000
Subject: [PATCH 1/2] [LoopUnroll] Add AArch64 tests for multi-exit loop
unrolling.
---
.../AArch64/apple-unrolling-multi-exit.ll | 395 ++++++++++++++++++
1 file changed, 395 insertions(+)
create mode 100644 llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
new file mode 100644
index 00000000000000..bfcd6f9e32a3b0
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
@@ -0,0 +1,395 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-unroll -mcpu=apple-m1 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
+; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx15.0.0"
+
+define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_i8_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; APPLE-NEXT: [[ENTRY:.*]]:
+; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
+; APPLE: [[LOOP_HEADER]]:
+; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE: [[EXIT]]:
+; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT: ret i1 [[C_3]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
+; OTHER-NEXT: [[ENTRY:.*]]:
+; OTHER-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; OTHER-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT: br label %[[LOOP_HEADER:.*]]
+; OTHER: [[LOOP_HEADER]]:
+; OTHER-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; OTHER-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
+; OTHER-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER: [[LOOP_LATCH]]:
+; OTHER-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; OTHER-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER: [[EXIT]]:
+; OTHER-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %c.1 = icmp eq i8 %l, %tgt
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+
+
+define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_ptr_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT: [[ENTRY:.*]]:
+; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
+; APPLE: [[LOOP_HEADER]]:
+; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; APPLE-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE: [[EXIT]]:
+; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT: ret i1 [[C_3]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_ptr_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT: [[ENTRY:.*]]:
+; OTHER-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; OTHER-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; OTHER-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT: br label %[[LOOP_HEADER:.*]]
+; OTHER: [[LOOP_HEADER]]:
+; OTHER-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; OTHER-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; OTHER-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER: [[LOOP_LATCH]]:
+; OTHER-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; OTHER-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER: [[EXIT]]:
+; OTHER-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+ %end = load ptr, ptr %gep.end, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load ptr, ptr %ptr.iv, align 8
+ %c.1 = icmp eq ptr %l, %tgt
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+
+define i1 @multi_2_exit_find_i8_loop_too_large(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT: [[ENTRY:.*]]:
+; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
+; APPLE: [[LOOP_HEADER]]:
+; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT: [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; APPLE-NEXT: [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; APPLE-NEXT: [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE: [[EXIT]]:
+; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT: ret i1 [[C_3]]
+;
+; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop_too_large(
+; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT: [[ENTRY:.*]]:
+; OTHER-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; OTHER-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT: br label %[[LOOP_HEADER:.*]]
+; OTHER: [[LOOP_HEADER]]:
+; OTHER-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; OTHER-NEXT: [[UDIV:%.*]] = udiv i8 [[L]], [[TGT]]
+; OTHER-NEXT: [[UDIV_2:%.*]] = udiv i8 [[UDIV]], 10
+; OTHER-NEXT: [[C_1:%.*]] = icmp eq i8 [[UDIV_2]], 2
+; OTHER-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER: [[LOOP_LATCH]]:
+; OTHER-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; OTHER-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER: [[EXIT]]:
+; OTHER-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT: ret i1 [[C_3]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ %udiv = udiv i8 %l, %tgt
+ %udiv.2 = udiv i8 %udiv, 10
+ %c.1 = icmp eq i8 %udiv.2, 2
+ br i1 %c.1, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+}
+define i1 @multi_3_exit_find_ptr_loop(ptr %vec, ptr %tgt, ptr %tgt2) {
+; APPLE-LABEL: define i1 @multi_3_exit_find_ptr_loop(
+; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT: [[ENTRY:.*]]:
+; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
+; APPLE: [[LOOP_HEADER]]:
+; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; APPLE-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
+; APPLE-NEXT: [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
+; APPLE-NEXT: br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT: br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE: [[EXIT]]:
+; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; APPLE-NEXT: [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT: ret i1 [[C_4]]
+;
+; OTHER-LABEL: define i1 @multi_3_exit_find_ptr_loop(
+; OTHER-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]], ptr [[TGT2:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT: [[ENTRY:.*]]:
+; OTHER-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
+; OTHER-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
+; OTHER-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT: br label %[[LOOP_HEADER:.*]]
+; OTHER: [[LOOP_HEADER]]:
+; OTHER-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
+; OTHER-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
+; OTHER-NEXT: [[C_2:%.*]] = icmp eq ptr [[L]], [[TGT2]]
+; OTHER-NEXT: [[OR_COND:%.*]] = select i1 [[C_1]], i1 true, i1 [[C_2]]
+; OTHER-NEXT: br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
+; OTHER: [[LOOP_LATCH]]:
+; OTHER-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
+; OTHER-NEXT: [[C_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT: br i1 [[C_3]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER: [[EXIT]]:
+; OTHER-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
+; OTHER-NEXT: [[C_4:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT: ret i1 [[C_4]]
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 8
+ %end = load ptr, ptr %gep.end, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load ptr, ptr %ptr.iv, align 8
+ %c.1 = icmp eq ptr %l, %tgt
+ br i1 %c.1, label %exit, label %then
+
+then:
+ %c.2 = icmp eq ptr %l, %tgt2
+ br i1 %c.2, label %exit, label %loop.latch
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 8
+ %c.3 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.3, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %ptr.iv, %then], [ %end, %loop.latch ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %end, i64 8) ]
+ %c.4 = icmp eq ptr %res, %end
+ ret i1 %c.4
+}
+
+define i1 @multi_3_exit_find_i8_loop_switch(ptr %vec, i8 %tgt) {
+; APPLE-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
+; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; APPLE-NEXT: [[ENTRY:.*]]:
+; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
+; APPLE: [[LOOP_HEADER]]:
+; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; APPLE-NEXT: switch i8 [[L]], label %[[LOOP_LATCH]] [
+; APPLE-NEXT: i8 0, label %[[EXIT_1:.*]]
+; APPLE-NEXT: i8 1, label %[[EXIT_2:.*]]
+; APPLE-NEXT: i8 2, label %[[EXIT:.*]]
+; APPLE-NEXT: ]
+; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE: [[EXIT]]:
+; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; APPLE-NEXT: ret i1 [[C_3]]
+; APPLE: [[EXIT_1]]:
+; APPLE-NEXT: ret i1 false
+; APPLE: [[EXIT_2]]:
+; APPLE-NEXT: ret i1 true
+;
+; OTHER-LABEL: define i1 @multi_3_exit_find_i8_loop_switch(
+; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0]] {
+; OTHER-NEXT: [[ENTRY:.*]]:
+; OTHER-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; OTHER-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
+; OTHER-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; OTHER-NEXT: br label %[[LOOP_HEADER:.*]]
+; OTHER: [[LOOP_HEADER]]:
+; OTHER-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; OTHER-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
+; OTHER-NEXT: switch i8 [[L]], label %[[LOOP_LATCH]] [
+; OTHER-NEXT: i8 0, label %[[EXIT_1:.*]]
+; OTHER-NEXT: i8 1, label %[[EXIT_2:.*]]
+; OTHER-NEXT: i8 2, label %[[EXIT:.*]]
+; OTHER-NEXT: ]
+; OTHER: [[LOOP_LATCH]]:
+; OTHER-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
+; OTHER-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; OTHER-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; OTHER: [[EXIT]]:
+; OTHER-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; OTHER-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
+; OTHER-NEXT: ret i1 [[C_3]]
+; OTHER: [[EXIT_1]]:
+; OTHER-NEXT: ret i1 false
+; OTHER: [[EXIT_2]]:
+; OTHER-NEXT: ret i1 true
+;
+entry:
+ %start = load ptr, ptr %vec, align 8
+ %gep.end = getelementptr inbounds nuw i8, ptr %vec, i64 1
+ %end = load ptr, ptr %gep.end, align 8
+ br label %loop.header
+
+loop.header:
+ %ptr.iv = phi ptr [ %ptr.iv.next, %loop.latch ], [ %start, %entry ]
+ %l = load i8, ptr %ptr.iv, align 8
+ switch i8 %l, label %loop.latch [
+ i8 0, label %exit.1
+ i8 1, label %exit.2
+ i8 2, label %exit ]
+
+loop.latch:
+ %ptr.iv.next = getelementptr inbounds nuw i8, ptr %ptr.iv, i64 1
+ %c.2 = icmp eq ptr %ptr.iv.next, %end
+ br i1 %c.2, label %exit, label %loop.header
+
+exit:
+ %res = phi ptr [ %ptr.iv, %loop.header ], [ %end, %loop.latch ]
+ %c.3 = icmp eq ptr %res, %end
+ ret i1 %c.3
+
+exit.1:
+ ret i1 0
+
+exit.2:
+ ret i1 1
+}
+
+declare void @llvm.assume(i1 noundef)
>From 47d7b739368ecb37051f2f29c0e5d29340529b22 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 28 Jan 2025 09:49:07 +0000
Subject: [PATCH 2/2] [AArch64] Runtime-unroll small multi-exit loops on Apple
Silicon.
Extend unrolling preferences to allow more aggressive unrolling of
search loops with 2 exits, building on the TTI hook added in
ad9da92cf6f7.
In combination with eac23a5b9 this enables unrolling loops like
std::find, which can improve performance significantly (+15% end-to-end
on a workload that makes heavy use of std::find). It increase the total
number of unrolled loops by ~2.5% across a very large corpus of
workloads.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 35 +++-
.../AArch64/apple-unrolling-multi-exit.ll | 152 ++++++++++++++++--
2 files changed, 164 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aae2fdaf5bec37..18b5a5beb62387 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4102,15 +4102,14 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
TargetTransformInfo::UnrollingPreferences &UP,
AArch64TTIImpl &TTI) {
// Limit loops with structure that is highly likely to benefit from runtime
- // unrolling; that is we exclude outer loops, loops with multiple exits and
- // many blocks (i.e. likely with complex control flow). Note that the
- // heuristics here may be overly conservative and we err on the side of
- // avoiding runtime unrolling rather than unroll excessively. They are all
- // subject to further refinement.
- if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)
+ // unrolling; that is we exclude outer loops and loops with many blocks (i.e.
+ // likely with complex control flow). Note that the heuristics here may be
+ // overly conservative and we err on the side of avoiding runtime unrolling
+ // rather than unroll excessively. They are all subject to further refinement.
+ if (!L->isInnermost() || L->getNumBlocks() > 8)
return;
- const SCEV *BTC = SE.getBackedgeTakenCount(L);
+ const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC) ||
(SE.getSmallConstantMaxTripCount(L) > 0 &&
SE.getSmallConstantMaxTripCount(L) <= 32))
@@ -4129,6 +4128,28 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
}
}
+ // Small search loops with multiple exits can be highly beneficial to unroll.
+ if (!L->getExitBlock()) {
+ if (L->getNumBlocks() == 2 && Size < 6 &&
+ all_of(
+ L->getBlocks(),
+ [](BasicBlock *BB) {
+ return isa<BranchInst>(BB->getTerminator());
+ })) {
+ UP.RuntimeUnrollMultiExit = true;
+ UP.Runtime = true;
+ // Limit unroll count.
+ UP.DefaultUnrollRuntimeCount = 4;
+ // Allow slightly more costly trip-count expansion to catch search loops
+ // with pointer inductions.
+ UP.SCEVExpansionBudget = 5;
+ }
+ return;
+ }
+
+ if (SE.getSymbolicMaxBackedgeTakenCount(L) != SE.getBackedgeTakenCount(L))
+ return;
+
// Limit to loops with trip counts that are cheap to expand.
UP.SCEVExpansionBudget = 1;
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
index bfcd6f9e32a3b0..31b23eae0f8660 100644
--- a/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling-multi-exit.ll
@@ -13,22 +13,78 @@ define i1 @multi_2_exit_find_i8_loop(ptr %vec, i8 %tgt) {
; APPLE-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 1
; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
-; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
-; APPLE: [[LOOP_HEADER]]:
-; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; APPLE-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
+; APPLE-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
+; APPLE-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
+; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 3
+; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; APPLE: [[LOOP_HEADER_PROL_PREHEADER]]:
+; APPLE-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; APPLE: [[LOOP_HEADER_PROL]]:
+; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
+; APPLE-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
; APPLE-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 8
; APPLE-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], [[TGT]]
-; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
-; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
+; APPLE: [[LOOP_LATCH_PROL]]:
; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 1
; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
-; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; APPLE: [[EXIT]]:
-; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; APPLE-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; APPLE-NEXT: br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
+; APPLE-NEXT: [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT: [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; APPLE-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 3
+; APPLE-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE: [[ENTRY_NEW]]:
+; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
+; APPLE: [[LOOP_HEADER]]:
+; APPLE-NEXT: [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[RES:%.*]], %[[LOOP_LATCH_3:.*]] ]
+; APPLE-NEXT: [[L1:%.*]] = load i8, ptr [[PTR_IV1]], align 8
+; APPLE-NEXT: [[C_4:%.*]] = icmp eq i8 [[L1]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 1
+; APPLE-NEXT: [[L_1:%.*]] = load i8, ptr [[PTR_IV_NEXT1]], align 8
+; APPLE-NEXT: [[C_1_1:%.*]] = icmp eq i8 [[L_1]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
+; APPLE: [[LOOP_LATCH_1]]:
+; APPLE-NEXT: [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 1
+; APPLE-NEXT: [[L_2:%.*]] = load i8, ptr [[PTR_IV_NEXT_1]], align 8
+; APPLE-NEXT: [[C_1_2:%.*]] = icmp eq i8 [[L_2]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
+; APPLE: [[LOOP_LATCH_2]]:
+; APPLE-NEXT: [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 1
+; APPLE-NEXT: [[L_3:%.*]] = load i8, ptr [[PTR_IV_NEXT_2]], align 8
+; APPLE-NEXT: [[C_1_3:%.*]] = icmp eq i8 [[L_3]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
+; APPLE: [[LOOP_LATCH_3]]:
+; APPLE-NEXT: [[RES]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 1
; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
-; APPLE-NEXT: ret i1 [[C_3]]
+; APPLE-NEXT: br i1 [[C_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; APPLE-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
+; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA:.*]]
+; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
+; APPLE-NEXT: [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
+; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
+; APPLE: [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT: [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
+; APPLE-NEXT: br label %[[EXIT]]
+; APPLE: [[EXIT]]:
+; APPLE-NEXT: [[RES1:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
+; APPLE-NEXT: [[C_5:%.*]] = icmp eq ptr [[RES1]], [[END]]
+; APPLE-NEXT: ret i1 [[C_5]]
;
; OTHER-LABEL: define i1 @multi_2_exit_find_i8_loop(
; OTHER-SAME: ptr [[VEC:%.*]], i8 [[TGT:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -80,22 +136,81 @@ define i1 @multi_2_exit_find_ptr_loop(ptr %vec, ptr %tgt) {
; APPLE-SAME: ptr [[VEC:%.*]], ptr [[TGT:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: [[START:%.*]] = load ptr, ptr [[VEC]], align 8
+; APPLE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[START]], i64 8) ]
; APPLE-NEXT: [[GEP_END:%.*]] = getelementptr inbounds nuw i8, ptr [[VEC]], i64 8
; APPLE-NEXT: [[END:%.*]] = load ptr, ptr [[GEP_END]], align 8
+; APPLE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
-; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
-; APPLE: [[LOOP_HEADER]]:
-; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[START]], %[[ENTRY]] ]
+; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
+; APPLE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
+; APPLE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
+; APPLE-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; APPLE-NEXT: [[TMP4:%.*]] = freeze i64 [[TMP3]]
+; APPLE-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], -1
+; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP4]], 3
+; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_PROL_PREHEADER:.*]], label %[[LOOP_HEADER_PROL_LOOPEXIT:.*]]
+; APPLE: [[LOOP_HEADER_PROL_PREHEADER]]:
+; APPLE-NEXT: br label %[[LOOP_HEADER_PROL:.*]]
+; APPLE: [[LOOP_HEADER_PROL]]:
+; APPLE-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH_PROL:.*]] ], [ [[START]], %[[LOOP_HEADER_PROL_PREHEADER]] ]
+; APPLE-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_LATCH_PROL]] ]
; APPLE-NEXT: [[L:%.*]] = load ptr, ptr [[PTR_IV]], align 8
; APPLE-NEXT: [[C_1:%.*]] = icmp eq ptr [[L]], [[TGT]]
-; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT:.*]], label %[[LOOP_LATCH]]
-; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: br i1 [[C_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT3:.*]], label %[[LOOP_LATCH_PROL]]
+; APPLE: [[LOOP_LATCH_PROL]]:
; APPLE-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8
; APPLE-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
-; APPLE-NEXT: br i1 [[C_2]], label %[[EXIT]], label %[[LOOP_HEADER]]
+; APPLE-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; APPLE-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
+; APPLE-NEXT: br i1 [[PROL_ITER_CMP]], label %[[LOOP_HEADER_PROL]], label %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]]:
+; APPLE-NEXT: [[RES_UNR_PH:%.*]] = phi ptr [ [[END]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT: [[PTR_IV_UNR_PH:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP_LATCH_PROL]] ]
+; APPLE-NEXT: br label %[[LOOP_HEADER_PROL_LOOPEXIT]]
+; APPLE: [[LOOP_HEADER_PROL_LOOPEXIT]]:
+; APPLE-NEXT: [[RES_UNR:%.*]] = phi ptr [ poison, %[[ENTRY]] ], [ [[RES_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT: [[PTR_IV_UNR:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_UNR_PH]], %[[LOOP_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; APPLE-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], 3
+; APPLE-NEXT: br i1 [[TMP6]], label %[[EXIT:.*]], label %[[ENTRY_NEW:.*]]
+; APPLE: [[ENTRY_NEW]]:
+; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
+; APPLE: [[LOOP_HEADER]]:
+; APPLE-NEXT: [[PTR_IV1:%.*]] = phi ptr [ [[PTR_IV_UNR]], %[[ENTRY_NEW]] ], [ [[PTR_IV_NEXT_3:%.*]], %[[LOOP_LATCH_3:.*]] ]
+; APPLE-NEXT: [[L1:%.*]] = load ptr, ptr [[PTR_IV1]], align 8
+; APPLE-NEXT: [[C_4:%.*]] = icmp eq ptr [[L1]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_4]], label %[[EXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[LOOP_LATCH:.*]]
+; APPLE: [[LOOP_LATCH]]:
+; APPLE-NEXT: [[PTR_IV_NEXT1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV1]], i64 8
+; APPLE-NEXT: [[L_1:%.*]] = load ptr, ptr [[PTR_IV_NEXT1]], align 8
+; APPLE-NEXT: [[C_1_1:%.*]] = icmp eq ptr [[L_1]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1_1]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_1:.*]]
+; APPLE: [[LOOP_LATCH_1]]:
+; APPLE-NEXT: [[PTR_IV_NEXT_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT1]], i64 8
+; APPLE-NEXT: [[L_2:%.*]] = load ptr, ptr [[PTR_IV_NEXT_1]], align 8
+; APPLE-NEXT: [[C_1_2:%.*]] = icmp eq ptr [[L_2]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1_2]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_2:.*]]
+; APPLE: [[LOOP_LATCH_2]]:
+; APPLE-NEXT: [[PTR_IV_NEXT_2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_1]], i64 8
+; APPLE-NEXT: [[L_3:%.*]] = load ptr, ptr [[PTR_IV_NEXT_2]], align 8
+; APPLE-NEXT: [[C_1_3:%.*]] = icmp eq ptr [[L_3]], [[TGT]]
+; APPLE-NEXT: br i1 [[C_1_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_LATCH_3]]
+; APPLE: [[LOOP_LATCH_3]]:
+; APPLE-NEXT: [[PTR_IV_NEXT_3]] = getelementptr inbounds nuw i8, ptr [[PTR_IV_NEXT_2]], i64 8
+; APPLE-NEXT: [[C_2_3:%.*]] = icmp eq ptr [[PTR_IV_NEXT_3]], [[END]]
+; APPLE-NEXT: br i1 [[C_2_3]], label %[[EXIT_UNR_LCSSA_LOOPEXIT]], label %[[LOOP_HEADER]]
+; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT]]:
+; APPLE-NEXT: [[RES_PH_PH:%.*]] = phi ptr [ [[PTR_IV1]], %[[LOOP_HEADER]] ], [ [[PTR_IV_NEXT1]], %[[LOOP_LATCH]] ], [ [[PTR_IV_NEXT_1]], %[[LOOP_LATCH_1]] ], [ [[PTR_IV_NEXT_2]], %[[LOOP_LATCH_2]] ], [ [[END]], %[[LOOP_LATCH_3]] ]
+; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA:.*]]
+; APPLE: [[EXIT_UNR_LCSSA_LOOPEXIT3]]:
+; APPLE-NEXT: [[RES_PH_PH4:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER_PROL]] ]
+; APPLE-NEXT: br label %[[EXIT_UNR_LCSSA]]
+; APPLE: [[EXIT_UNR_LCSSA]]:
+; APPLE-NEXT: [[RES_PH:%.*]] = phi ptr [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ], [ [[RES_PH_PH4]], %[[EXIT_UNR_LCSSA_LOOPEXIT3]] ]
+; APPLE-NEXT: br label %[[EXIT]]
; APPLE: [[EXIT]]:
-; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP_HEADER]] ], [ [[END]], %[[LOOP_LATCH]] ]
+; APPLE-NEXT: [[RES:%.*]] = phi ptr [ [[RES_UNR]], %[[LOOP_HEADER_PROL_LOOPEXIT]] ], [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ]
; APPLE-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[END]], i64 8) ]
; APPLE-NEXT: [[C_3:%.*]] = icmp eq ptr [[RES]], [[END]]
; APPLE-NEXT: ret i1 [[C_3]]
@@ -393,3 +508,8 @@ exit.2:
}
declare void @llvm.assume(i1 noundef)
+;.
+; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
+; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+;.
More information about the llvm-commits
mailing list