[llvm] [LoopVectorize] Perform loop versioning for some early exit loops (PR #120603)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 24 03:03:19 PST 2025
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/120603
>From 7ba35bb7bd2bae6f0d442cc187cb6210d08019ea Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 24 Feb 2025 10:45:38 +0000
Subject: [PATCH 1/4] Add tests
---
.../AArch64/single_early_exit_unsafe_ptrs.ll | 442 ++++++++++++++++++
.../RISCV/single_early_exit_unsafe_ptrs.ll | 398 ++++++++++++++++
.../single_early_exit_unsafe_ptrs.ll | 42 +-
3 files changed, 881 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
new file mode 100644
index 0000000000000..4f7b120643763
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
@@ -0,0 +1,442 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @init_mem(ptr, i64);
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4
+; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [42 x i8]
+ %p2 = alloca [42 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
+; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %stride2 = mul i64 %index, 2
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %mul = mul i64 %index, %stride
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) #1 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 4
+ %cmp3 = icmp eq i32 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx2, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i8 @same_exit_block_no_live_outs_faulting_load_after_early_exit(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i8 @same_exit_block_no_live_outs_faulting_load_after_early_exit(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK: [[LOOP_INC]]:
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK: [[LOOP_END]]:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ 1, %[[LOOP]] ], [ [[LD2]], %[[LOOP_INC]] ]
+; CHECK-NEXT: ret i8 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx2, align 1
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i8 [ 1, %loop ], [ %ld2, %loop.inc ]
+ ret i8 %retval
+}
+
+
+attributes #0 = { "target-features"="+sve" vscale_range(1,16) }
+attributes #1 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
new file mode 100644
index 0000000000000..929cc5b84d922
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -passes=loop-vectorize \
+; RUN: -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f | FileCheck %s
+
+target triple = "riscv64"
+
+declare void @init_mem(ptr, i64);
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1
+; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1
+; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ %p1 = alloca [42 x i8]
+ %p2 = alloca [42 x i8]
+ call void @init_mem(ptr %p1, i64 1024)
+ call void @init_mem(ptr %p2, i64 1024)
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
+; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx1, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %stride2 = mul i64 %index, 2
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %mul = mul i64 %index, %stride
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i32, ptr %arrayidx, align 4
+ %cmp3 = icmp eq i32 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index
+ %ld2 = load i8, ptr %arrayidx2, align 1
+ %cmp3 = icmp eq i8 %ld1, %ld2
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
+
+
+attributes #0 = { vscale_range(2,1024) }
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
index c68eeac19c9ec..23362004b1b2a 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
declare void @init_mem(ptr, i64);
@@ -141,3 +141,43 @@ loop.end:
%retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
ret i64 %retval
}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
+; CHECK-SAME: ptr [[P1:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK: loop.inc:
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK: loop.end:
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: ret i64 [[RETVAL]]
+;
+entry:
+ br label %loop
+
+loop:
+ %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+ %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+ %ld1 = load i8, ptr %arrayidx, align 1
+ %cmp3 = icmp eq i8 %ld1, 3
+ br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+ %index.next = add i64 %index, 1
+ %exitcond = icmp ne i64 %index.next, 67
+ br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+ %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+ ret i64 %retval
+}
>From d47bfd7120aacac1729fedf9f5a767188f45b70d Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 24 Feb 2025 10:46:01 +0000
Subject: [PATCH 2/4] [LoopVectorize] Perform loop versioning for some early
exit loops
When attempting to vectorise a loop with an uncountable early
exit, we attempt to discover if all the loads in the loop are
known to be dereferenceable. If at least one load could
potentially fault then we abandon vectorisation. This patch
adds support for vectorising loops with one potentially
faulting load by versioning the loop based on the load
pointer alignment. It is required that the vector load must
always fault on the first lane, i.e. the load should not
straddle a page boundary. Doing so ensures that the behaviour
of the vector and scalar loops is identical, i.e. if a load
does fault it will fault at the same scalar iteration.
Such vectorisation depends on the following conditions being
met:
1. The max vector width must not exceed the minimum page size.
This is done by adding a getMaxSafeVectorWidthInBits
wrapper that checks if we have an uncountable early exit.
For scalable vectors we must be able to determine the maximum
possible value of vscale.
2. The size of the loaded type must be a power of 2. This is
checked during legalisation.
3. The VF must be a power of two (so that the vector width can
divide wholly into the page size which is also power of 2).
For fixed-width vectors this is always true, and for scalable
vectors we query the TTI hook isVScaleKnownToBeAPowerOfTwo.
If the effective runtime VF could change during the loop then
this cannot be vectorised via loop versioning.
4. The load pointer must be aligned to a multiple of the vector
width. (NOTE: interleaving is currently disabled for these early
exit loops.) We add a runtime check to ensure this is true.
---
llvm/include/llvm/Analysis/Loads.h | 6 -
.../Vectorize/LoopVectorizationLegality.h | 33 +-
llvm/lib/Analysis/Loads.cpp | 15 -
.../Vectorize/LoopVectorizationLegality.cpp | 85 ++-
.../Transforms/Vectorize/LoopVectorize.cpp | 70 +-
.../AArch64/single_early_exit_unsafe_ptrs.ll | 722 +++++++++++++++---
.../RISCV/single_early_exit_unsafe_ptrs.ll | 97 ++-
.../single_early_exit_unsafe_ptrs.ll | 2 +-
8 files changed, 871 insertions(+), 159 deletions(-)
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 639070c07897b..224c936bf161e 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -88,12 +88,6 @@ bool isDereferenceableAndAlignedInLoop(
AssumptionCache *AC = nullptr,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
-/// Return true if the loop \p L cannot fault on any iteration and only
-/// contains read-only memory accesses.
-bool isDereferenceableReadOnlyLoop(
- Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
- SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
-
/// Return true if we know that executing a load from this value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index e959d93b57275..b4bf528472c5c 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -382,11 +382,18 @@ class LoopVectorizationLegality {
const LoopAccessInfo *getLAI() const { return LAI; }
bool isSafeForAnyVectorWidth() const {
- return LAI->getDepChecker().isSafeForAnyVectorWidth();
+ return LAI->getDepChecker().isSafeForAnyVectorWidth() &&
+ (!hasUncountableEarlyExit() || !getNumPotentiallyFaultingPointers());
}
uint64_t getMaxSafeVectorWidthInBits() const {
- return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
+ uint64_t MaxSafeVectorWidth =
+ LAI->getDepChecker().getMaxSafeVectorWidthInBits();
+ // The legalizer bails out if getMinPageSize does not return a value.
+ if (hasUncountableEarlyExit() && getNumPotentiallyFaultingPointers())
+ MaxSafeVectorWidth =
+ std::min(MaxSafeVectorWidth, uint64_t(*TTI->getMinPageSize()) * 8);
+ return MaxSafeVectorWidth;
}
/// Returns true if the loop has exactly one uncountable early exit, i.e. an
@@ -419,6 +426,19 @@ class LoopVectorizationLegality {
unsigned getNumStores() const { return LAI->getNumStores(); }
unsigned getNumLoads() const { return LAI->getNumLoads(); }
+ /// Return the number of pointers in the loop that could potentially fault in
+ /// a loop with uncountable early exits.
+ unsigned getNumPotentiallyFaultingPointers() const {
+ return PotentiallyFaultingPtrs.size();
+ }
+
+ /// Return a vector of all potentially faulting pointers in a loop with
+ /// uncountable early exits.
+ const SmallVectorImpl<std::pair<const SCEV *, Type *>> *
+ getPotentiallyFaultingPointers() const {
+ return &PotentiallyFaultingPtrs;
+ }
+
/// Returns a HistogramInfo* for the given instruction if it was determined
/// to be part of a load -> update -> store sequence where multiple lanes
/// may be working on the same memory address.
@@ -524,6 +544,11 @@ class LoopVectorizationLegality {
/// additional cases safely.
bool isVectorizableEarlyExitLoop();
+ /// Returns true if all loads in the loop contained in \p Loads can be
+ /// analyzed as potentially faulting. Any loads that may fault are added to
+ /// the member variable PotentiallyFaultingPtrs.
+ bool analyzePotentiallyFaultingLoads(SmallVectorImpl<LoadInst *> *Loads);
+
/// Return true if all of the instructions in the block can be speculatively
/// executed, and record the loads/stores that require masking.
/// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -642,6 +667,10 @@ class LoopVectorizationLegality {
/// Keep track of the loop edge to an uncountable exit, comprising a pair
/// of (Exiting, Exit) blocks, if there is exactly one early exit.
std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge;
+
+ /// Keep a record of all potentially faulting pointers in loops with
+ /// uncountable early exits.
+ SmallVector<std::pair<const SCEV *, Type *>, 4> PotentiallyFaultingPtrs;
};
} // namespace llvm
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index b461c41d29e84..304bdcd1fba25 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -816,18 +816,3 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
return isPointerAlwaysReplaceable(From, To, DL);
}
-
-bool llvm::isDereferenceableReadOnlyLoop(
- Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
- SmallVectorImpl<const SCEVPredicate *> *Predicates) {
- for (BasicBlock *BB : L->blocks()) {
- for (Instruction &I : *BB) {
- if (auto *LI = dyn_cast<LoadInst>(&I)) {
- if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
- return false;
- } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
- return false;
- }
- }
- return true;
-}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 420cbc5384ce4..ff3954d556e39 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1602,6 +1602,43 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
return Result;
}
+bool LoopVectorizationLegality::analyzePotentiallyFaultingLoads(
+ SmallVectorImpl<LoadInst *> *Loads) {
+ LLVM_DEBUG(dbgs() << "LV: Looking for potentially faulting loads in loop "
+ "with uncountable early exit:\n");
+ for (LoadInst *LI : *Loads) {
+ LLVM_DEBUG(dbgs() << "LV: Load: " << *LI << '\n');
+ Value *Ptr = LI->getPointerOperand();
+ if (!Ptr)
+ return false;
+ const SCEV *PtrExpr = PSE.getSCEV(Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
+ // TODO: Deal with loop invariant pointers.
+ if (!AR || AR->getLoop() != TheLoop || !AR->isAffine())
+ return false;
+ auto Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*PSE.getSE()));
+ if (!Step)
+ return false;
+ const SCEV *Start = AR->getStart();
+
+ // Make sure the step is positive and matches the object size in memory.
+ // TODO: Extend this to cover more cases.
+ auto &DL = LI->getDataLayout();
+ APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+ DL.getTypeStoreSize(LI->getType()).getFixedValue());
+
+ // Also discard element sizes that are not a power of 2, since the loop
+ // vectorizer can only perform loop versioning with pointer alignment
+ // checks for vector loads that are power-of-2 in size.
+ if (EltSize != Step->getAPInt() || !EltSize.isPowerOf2())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "LV: SCEV for Load Ptr: " << *Start << '\n');
+ PotentiallyFaultingPtrs.push_back({Start, LI->getType()});
+ }
+ return true;
+}
+
bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
BasicBlock *LatchBB = TheLoop->getLoopLatch();
if (!LatchBB) {
@@ -1706,6 +1743,8 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
}
};
+ Predicates.clear();
+ SmallVector<LoadInst *, 4> NonDerefLoads;
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
if (I.mayWriteToMemory()) {
@@ -1715,30 +1754,52 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
"Cannot vectorize early exit loop with writes to memory",
"WritesInEarlyExitLoop", ORE, TheLoop);
return false;
- } else if (!IsSafeOperation(&I)) {
+ } else if (I.mayThrow() || !IsSafeOperation(&I)) {
reportVectorizationFailure("Early exit loop contains operations that "
"cannot be speculatively executed",
"UnsafeOperationsEarlyExitLoop", ORE,
TheLoop);
return false;
+ } else if (I.mayReadFromMemory()) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ bool UnsafeRead = false;
+ if (!LI)
+ UnsafeRead = true;
+ else if (!isDereferenceableAndAlignedInLoop(LI, TheLoop, *PSE.getSE(),
+ *DT, AC, &Predicates)) {
+ if (LI->getParent() != TheLoop->getHeader())
+ UnsafeRead = true;
+ else
+ NonDerefLoads.push_back(LI);
+ }
+
+ if (UnsafeRead) {
+ reportVectorizationFailure(
+ "Loop may fault",
+ "Cannot vectorize potentially faulting early exit loop",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
}
}
+ if (!NonDerefLoads.empty()) {
+ if (!TTI->getMinPageSize() ||
+ !analyzePotentiallyFaultingLoads(&NonDerefLoads)) {
+ PotentiallyFaultingPtrs.clear();
+ reportVectorizationFailure(
+ "Loop may fault",
+ "Cannot vectorize potentially faulting early exit loop",
+ "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "We can vectorize the loop with runtime checks.\n");
+ }
+
// The vectoriser cannot handle loads that occur after the early exit block.
assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first &&
"Expected latch predecessor to be the early exiting block");
- // TODO: Handle loops that may fault.
- Predicates.clear();
- if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
- &Predicates)) {
- reportVectorizationFailure(
- "Loop may fault",
- "Cannot vectorize potentially faulting early exit loop",
- "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
- return false;
- }
-
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
PSE.getSymbolicMaxBackedgeTakenCount();
// Since we have an exact exit count for the latch and the early exit
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e8a5db28ea0a4..fd38fb4e0a42f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -401,6 +401,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));
+static cl::opt<unsigned> MaxNumPotentiallyFaultingPointers(
+ "max-num-faulting-pointers", cl::init(0), cl::Hidden,
+ cl::desc(
+ "The maximum number of potentially faulting pointers we permit when "
+ "vectorizing loops with uncountable exits."));
+
// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
// variables not overflowing do not hold. See `emitSCEVChecks`.
static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127};
@@ -2163,6 +2169,27 @@ class GeneratedRTChecks {
};
} // namespace
+static void addPointerAlignmentChecks(
+ const SmallVectorImpl<std::pair<const SCEV *, Type *>> *Ptrs, Function *F,
+ PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF,
+ unsigned IC) {
+ ScalarEvolution *SE = PSE.getSE();
+ const DataLayout &DL = SE->getDataLayout();
+
+ for (auto Ptr : *Ptrs) {
+ Type *PtrIntType = DL.getIntPtrType(Ptr.first->getType());
+ APInt EltSize(PtrIntType->getScalarSizeInBits(),
+ DL.getTypeStoreSize(Ptr.second).getFixedValue());
+ const SCEV *Start = SE->getPtrToIntExpr(Ptr.first, PtrIntType);
+ const SCEV *ScevEC = SE->getElementCount(PtrIntType, VF * IC);
+ const SCEV *Align =
+ SE->getMulExpr(ScevEC, SE->getConstant(EltSize),
+ (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
+ const SCEV *Rem = SE->getURemExpr(Start, Align);
+ PSE.addPredicate(*(SE->getEqualPredicate(Rem, SE->getZero(PtrIntType))));
+ }
+}
+
static bool useActiveLaneMask(TailFoldingStyle Style) {
return Style == TailFoldingStyle::Data ||
Style == TailFoldingStyle::DataAndControlFlow ||
@@ -3842,6 +3869,15 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
return false;
}
+ if (Legal->hasUncountableEarlyExit() &&
+ Legal->getNumPotentiallyFaultingPointers() &&
+ !TTI.isVScaleKnownToBeAPowerOfTwo()) {
+ reportVectorizationInfo("Cannot vectorize potentially faulting early exit "
+ "loop with scalable vectors.",
+ "ScalableVFUnfeasible", ORE, TheLoop);
+ return false;
+ }
+
IsScalableVectorizationAllowed = true;
return true;
}
@@ -10508,11 +10544,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) {
- reportVectorizationFailure("Auto-vectorization of loops with uncountable "
- "early exit is not enabled",
- "UncountableEarlyExitLoopsDisabled", ORE, L);
- return false;
+ if (LVL.hasUncountableEarlyExit()) {
+ if (!EnableEarlyExitVectorization) {
+ reportVectorizationFailure("Auto-vectorization of loops with uncountable "
+ "early exit is not enabled",
+ "UncountableEarlyExitLoopsDisabled", ORE, L);
+ return false;
+ }
+
+ unsigned NumPotentiallyFaultingPointers =
+ LVL.getNumPotentiallyFaultingPointers();
+ if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
+ reportVectorizationFailure("Not worth vectorizing loop with uncountable "
+ "early exit, due to number of potentially "
+ "faulting loads",
+ "UncountableEarlyExitMayFault", ORE, L);
+ return false;
+ } else if (NumPotentiallyFaultingPointers)
+ LLVM_DEBUG(dbgs() << "LV: Need to version early-exit vector loop with "
+ << "pointer alignment checks.\n");
}
// Entrance to the VPlan-native vectorization path. Outer loops are processed
@@ -10663,8 +10713,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
unsigned SelectedIC = std::max(IC, UserIC);
// Optimistically generate runtime checks if they are needed. Drop them if
// they turn out to not be profitable.
- if (VF.Width.isVector() || SelectedIC > 1)
+ if (VF.Width.isVector() || SelectedIC > 1) {
+ if (LVL.getNumPotentiallyFaultingPointers()) {
+ assert(!CM.foldTailWithEVL() &&
+ "Explicit vector length unsupported for early exit loops and "
+ "potentially faulting loads");
+ addPointerAlignmentChecks(LVL.getPotentiallyFaultingPointers(), F, PSE,
+ TTI, VF.Width, SelectedIC);
+ }
Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+ }
// Check if it is profitable to vectorize with runtime checks.
bool ForceVectorization =
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
index 4f7b120643763..2163df92c83ad 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
@@ -1,5 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 5
-; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=1 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,MAX1
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=2 \
+; RUN: | FileCheck %s --check-prefixes=CHECK,MAX2
target triple = "aarch64-unknown-linux-gnu"
@@ -10,19 +13,68 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK: [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]]
+; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP12]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 16
+; CHECK-NEXT: [[TMP15:%.*]] = add i64 3, [[N_VEC]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP18]], align 1
+; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], [[TMP14]]
+; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 16 x i1> [[TMP19]], splat (i1 true)
+; CHECK-NEXT: [[TMP21:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP20]])
+; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
+; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]]
+; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_SPLIT]]:
+; CHECK-NEXT: br i1 [[TMP21]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: br label %[[LOOP_END]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP15]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]]
; CHECK: [[LOOP_INC]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], 1
+; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -47,29 +99,121 @@ loop.end:
define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 {
-; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4
-; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4
-; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
-; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
-; CHECK: [[LOOP_INC]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
-; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
-; CHECK-NEXT: ret i64 [[RETVAL]]
+; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
+; MAX1-SAME: ) #[[ATTR0]] {
+; MAX1-NEXT: [[ENTRY:.*]]:
+; MAX1-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4
+; MAX1-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4
+; MAX1-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; MAX1-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; MAX1-NEXT: br label %[[LOOP:.*]]
+; MAX1: [[LOOP]]:
+; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; MAX1: [[LOOP_INC]]:
+; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; MAX1: [[LOOP_END]]:
+; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
+; MAX1-NEXT: ret i64 [[RETVAL]]
+;
+; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
+; MAX2-SAME: ) #[[ATTR0]] {
+; MAX2-NEXT: [[ENTRY:.*]]:
+; MAX2-NEXT: [[P1:%.*]] = alloca [42 x i8], align 4
+; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; MAX2-NEXT: [[P2:%.*]] = alloca [42 x i8], align 4
+; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64
+; MAX2-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
+; MAX2-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
+; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]]
+; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; MAX2: [[VECTOR_SCEVCHECK]]:
+; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3
+; MAX2-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[P11]], 3
+; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
+; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
+; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]]
+; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4
+; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]]
+; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3
+; MAX2-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[P22]], 3
+; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]]
+; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]]
+; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4
+; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]]
+; MAX2-NEXT: br i1 true, label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; MAX2: [[VECTOR_PH]]:
+; MAX2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 16
+; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP18]]
+; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; MAX2-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 16
+; MAX2-NEXT: [[TMP21:%.*]] = add i64 3, [[N_VEC]]
+; MAX2-NEXT: [[TMP22:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
+; MAX2-NEXT: [[TMP23:%.*]] = mul <vscale x 16 x i64> [[TMP22]], splat (i64 1)
+; MAX2-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> splat (i64 3), [[TMP23]]
+; MAX2-NEXT: [[TMP24:%.*]] = mul i64 1, [[TMP20]]
+; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP24]], i64 0
+; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
+; MAX2-NEXT: br label %[[VECTOR_BODY:.*]]
+; MAX2: [[VECTOR_BODY]]:
+; MAX2-NEXT: [[INDEX3:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT5:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX3]]
+; MAX2-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], 0
+; MAX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP25]]
+; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP27]], align 1
+; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP25]]
+; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP29]], align 1
+; MAX2-NEXT: [[TMP30:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD4]]
+; MAX2-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], [[TMP20]]
+; MAX2-NEXT: [[TMP31:%.*]] = xor <vscale x 16 x i1> [[TMP30]], splat (i1 true)
+; MAX2-NEXT: [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP31]])
+; MAX2-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC]]
+; MAX2-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; MAX2-NEXT: [[TMP34:%.*]] = or i1 [[TMP32]], [[TMP33]]
+; MAX2-NEXT: br i1 [[TMP34]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; MAX2: [[MIDDLE_SPLIT]]:
+; MAX2-NEXT: br i1 [[TMP32]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; MAX2: [[MIDDLE_BLOCK]]:
+; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]]
+; MAX2: [[VECTOR_EARLY_EXIT]]:
+; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP31]], i1 true)
+; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; MAX2-NEXT: br label %[[LOOP_END]]
+; MAX2: [[SCALAR_PH]]:
+; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ]
+; MAX2-NEXT: br label %[[LOOP:.*]]
+; MAX2: [[LOOP]]:
+; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]]
+; MAX2: [[LOOP_INC]]:
+; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
+; MAX2: [[LOOP_END]]:
+; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
+; MAX2-NEXT: ret i64 [[RETVAL]]
;
entry:
%p1 = alloca [42 x i8]
@@ -99,25 +243,116 @@ loop.end:
define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 {
-; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
-; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
-; CHECK: [[LOOP_INC]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
-; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
-; CHECK-NEXT: ret i64 [[RETVAL]]
+; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
+; MAX1-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] {
+; MAX1-NEXT: [[ENTRY:.*]]:
+; MAX1-NEXT: br label %[[LOOP:.*]]
+; MAX1: [[LOOP]]:
+; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; MAX1: [[LOOP_INC]]:
+; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; MAX1: [[LOOP_END]]:
+; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
+; MAX1-NEXT: ret i64 [[RETVAL]]
+;
+; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
+; MAX2-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] {
+; MAX2-NEXT: [[ENTRY:.*]]:
+; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64
+; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]]
+; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; MAX2: [[VECTOR_SCEVCHECK]]:
+; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3
+; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3
+; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
+; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
+; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]]
+; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4
+; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]]
+; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3
+; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3
+; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]]
+; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]]
+; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4
+; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]]
+; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0
+; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]]
+; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; MAX2: [[VECTOR_PH]]:
+; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16
+; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]]
+; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16
+; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]]
+; MAX2-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
+; MAX2-NEXT: [[TMP24:%.*]] = mul <vscale x 16 x i64> [[TMP23]], splat (i64 1)
+; MAX2-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> splat (i64 3), [[TMP24]]
+; MAX2-NEXT: [[TMP25:%.*]] = mul i64 1, [[TMP21]]
+; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP25]], i64 0
+; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
+; MAX2-NEXT: br label %[[VECTOR_BODY:.*]]
+; MAX2: [[VECTOR_BODY]]:
+; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]]
+; MAX2-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 0
+; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP26]]
+; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP28]], align 1
+; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP26]]
+; MAX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP30]], align 1
+; MAX2-NEXT: [[TMP31:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD5]]
+; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]]
+; MAX2-NEXT: [[TMP32:%.*]] = xor <vscale x 16 x i1> [[TMP31]], splat (i1 true)
+; MAX2-NEXT: [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP32]])
+; MAX2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
+; MAX2-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; MAX2-NEXT: [[TMP35:%.*]] = or i1 [[TMP33]], [[TMP34]]
+; MAX2-NEXT: br i1 [[TMP35]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; MAX2: [[MIDDLE_SPLIT]]:
+; MAX2-NEXT: br i1 [[TMP33]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; MAX2: [[MIDDLE_BLOCK]]:
+; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]]
+; MAX2: [[VECTOR_EARLY_EXIT]]:
+; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; MAX2-NEXT: br label %[[LOOP_END]]
+; MAX2: [[SCALAR_PH]]:
+; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ]
+; MAX2-NEXT: br label %[[LOOP:.*]]
+; MAX2: [[LOOP]]:
+; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]]
+; MAX2: [[LOOP_INC]]:
+; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP7:![0-9]+]]
+; MAX2: [[LOOP_END]]:
+; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
+; MAX2-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop
@@ -143,25 +378,116 @@ loop.end:
define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 {
-; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
-; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
-; CHECK: [[LOOP_INC]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
-; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
-; CHECK-NEXT: ret i64 [[RETVAL]]
+; MAX1-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
+; MAX1-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; MAX1-NEXT: [[ENTRY:.*]]:
+; MAX1-NEXT: br label %[[LOOP:.*]]
+; MAX1: [[LOOP]]:
+; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; MAX1: [[LOOP_INC]]:
+; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; MAX1: [[LOOP_END]]:
+; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ]
+; MAX1-NEXT: ret i64 [[RETVAL]]
+;
+; MAX2-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
+; MAX2-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; MAX2-NEXT: [[ENTRY:.*]]:
+; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64
+; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]]
+; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; MAX2: [[VECTOR_SCEVCHECK]]:
+; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3
+; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3
+; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
+; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
+; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]]
+; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4
+; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]]
+; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3
+; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3
+; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]]
+; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]]
+; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4
+; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]]
+; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0
+; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]]
+; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; MAX2: [[VECTOR_PH]]:
+; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16
+; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]]
+; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16
+; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]]
+; MAX2-NEXT: [[TMP23:%.*]] = call <vscale x 16 x i64> @llvm.stepvector.nxv16i64()
+; MAX2-NEXT: [[TMP24:%.*]] = mul <vscale x 16 x i64> [[TMP23]], splat (i64 1)
+; MAX2-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i64> splat (i64 3), [[TMP24]]
+; MAX2-NEXT: [[TMP25:%.*]] = mul i64 1, [[TMP21]]
+; MAX2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i64> poison, i64 [[TMP25]], i64 0
+; MAX2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i64> [[DOTSPLATINSERT]], <vscale x 16 x i64> poison, <vscale x 16 x i32> zeroinitializer
+; MAX2-NEXT: br label %[[VECTOR_BODY:.*]]
+; MAX2: [[VECTOR_BODY]]:
+; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 16 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]]
+; MAX2-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX]], 0
+; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP26]]
+; MAX2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP28]], align 1
+; MAX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP26]]
+; MAX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP30]], align 1
+; MAX2-NEXT: [[TMP31:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD5]]
+; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]]
+; MAX2-NEXT: [[TMP32:%.*]] = xor <vscale x 16 x i1> [[TMP31]], splat (i1 true)
+; MAX2-NEXT: [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP32]])
+; MAX2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
+; MAX2-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i64> [[VEC_IND]], [[DOTSPLAT]]
+; MAX2-NEXT: [[TMP35:%.*]] = or i1 [[TMP33]], [[TMP34]]
+; MAX2-NEXT: br i1 [[TMP35]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; MAX2: [[MIDDLE_SPLIT]]:
+; MAX2-NEXT: br i1 [[TMP33]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; MAX2: [[MIDDLE_BLOCK]]:
+; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]]
+; MAX2: [[VECTOR_EARLY_EXIT]]:
+; MAX2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
+; MAX2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; MAX2-NEXT: br label %[[LOOP_END]]
+; MAX2: [[SCALAR_PH]]:
+; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ]
+; MAX2-NEXT: br label %[[LOOP:.*]]
+; MAX2: [[LOOP]]:
+; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]]
+; MAX2: [[LOOP_INC]]:
+; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP9:![0-9]+]]
+; MAX2: [[LOOP_END]]:
+; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP]] ], [ 67, %[[LOOP_INC]] ], [ 67, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
+; MAX2-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop
@@ -271,23 +597,105 @@ loop.end:
define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) #1 {
-; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
-; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
-; CHECK: [[LOOP_INC]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
-; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
-; CHECK-NEXT: ret i64 [[RETVAL]]
+; MAX1-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
+; MAX1-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] {
+; MAX1-NEXT: [[ENTRY:.*]]:
+; MAX1-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; MAX1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; MAX1: [[VECTOR_SCEVCHECK]]:
+; MAX1-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i4
+; MAX1-NEXT: [[TMP1:%.*]] = add i4 [[TMP0]], 3
+; MAX1-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i64
+; MAX1-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0
+; MAX1-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; MAX1: [[VECTOR_PH]]:
+; MAX1-NEXT: br label %[[VECTOR_BODY:.*]]
+; MAX1: [[VECTOR_BODY]]:
+; MAX1-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ]
+; MAX1-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; MAX1-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; MAX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]]
+; MAX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; MAX1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
+; MAX1-NEXT: [[TMP6:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3)
+; MAX1-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 16
+; MAX1-NEXT: [[TMP7:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true)
+; MAX1-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP7]])
+; MAX1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; MAX1-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; MAX1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; MAX1: [[MIDDLE_SPLIT]]:
+; MAX1-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; MAX1: [[MIDDLE_BLOCK]]:
+; MAX1-NEXT: br i1 true, label %[[LOOP_END:.*]], label %[[SCALAR_PH]]
+; MAX1: [[VECTOR_EARLY_EXIT]]:
+; MAX1-NEXT: br label %[[LOOP_END]]
+; MAX1: [[SCALAR_PH]]:
+; MAX1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ]
+; MAX1-NEXT: br label %[[LOOP:.*]]
+; MAX1: [[LOOP]]:
+; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]]
+; MAX1: [[LOOP_INC]]:
+; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
+; MAX1: [[LOOP_END]]:
+; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ]
+; MAX1-NEXT: ret i64 [[RETVAL]]
+;
+; MAX2-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
+; MAX2-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] {
+; MAX2-NEXT: [[ENTRY:.*]]:
+; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; MAX2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; MAX2: [[VECTOR_SCEVCHECK]]:
+; MAX2-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i4
+; MAX2-NEXT: [[TMP1:%.*]] = add i4 [[TMP0]], 3
+; MAX2-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i64
+; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0
+; MAX2-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; MAX2: [[VECTOR_PH]]:
+; MAX2-NEXT: br label %[[VECTOR_BODY:.*]]
+; MAX2: [[VECTOR_BODY]]:
+; MAX2-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; MAX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]]
+; MAX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
+; MAX2-NEXT: [[TMP6:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3)
+; MAX2-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 16
+; MAX2-NEXT: [[TMP7:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true)
+; MAX2-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP7]])
+; MAX2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; MAX2-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; MAX2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; MAX2: [[MIDDLE_SPLIT]]:
+; MAX2-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; MAX2: [[MIDDLE_BLOCK]]:
+; MAX2-NEXT: br i1 true, label %[[LOOP_END:.*]], label %[[SCALAR_PH]]
+; MAX2: [[VECTOR_EARLY_EXIT]]:
+; MAX2-NEXT: br label %[[LOOP_END]]
+; MAX2: [[SCALAR_PH]]:
+; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ]
+; MAX2-NEXT: br label %[[LOOP:.*]]
+; MAX2: [[LOOP]]:
+; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]]
+; MAX2: [[LOOP_INC]]:
+; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP11:![0-9]+]]
+; MAX2: [[LOOP_END]]:
+; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ]
+; MAX2-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop
@@ -351,25 +759,106 @@ loop.end:
define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 {
-; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
-; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
-; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
-; CHECK: [[LOOP_INC]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
-; CHECK: [[LOOP_END]]:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
-; CHECK-NEXT: ret i64 [[RETVAL]]
+; MAX1-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
+; MAX1-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; MAX1-NEXT: [[ENTRY:.*]]:
+; MAX1-NEXT: br label %[[LOOP:.*]]
+; MAX1: [[LOOP]]:
+; MAX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 3, %[[ENTRY]] ]
+; MAX1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX1-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; MAX1-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX1-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
+; MAX1: [[LOOP_INC]]:
+; MAX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX1-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX1-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
+; MAX1: [[LOOP_END]]:
+; MAX1-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
+; MAX1-NEXT: ret i64 [[RETVAL]]
+;
+; MAX2-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
+; MAX2-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; MAX2-NEXT: [[ENTRY:.*]]:
+; MAX2-NEXT: [[P22:%.*]] = ptrtoint ptr [[P2]] to i64
+; MAX2-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; MAX2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; MAX2-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; MAX2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]]
+; MAX2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; MAX2: [[VECTOR_SCEVCHECK]]:
+; MAX2-NEXT: [[TMP3:%.*]] = add i64 [[P11]], 3
+; MAX2-NEXT: [[TMP4:%.*]] = add nuw i64 [[P11]], 3
+; MAX2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
+; MAX2-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
+; MAX2-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]]
+; MAX2-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 4
+; MAX2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]]
+; MAX2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; MAX2-NEXT: [[TMP11:%.*]] = add i64 [[P22]], 3
+; MAX2-NEXT: [[TMP12:%.*]] = add nuw i64 [[P22]], 3
+; MAX2-NEXT: [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]]
+; MAX2-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]]
+; MAX2-NEXT: [[TMP15:%.*]] = shl i64 [[TMP14]], 4
+; MAX2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]]
+; MAX2-NEXT: [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0
+; MAX2-NEXT: [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]]
+; MAX2-NEXT: br i1 [[TMP17]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; MAX2: [[VECTOR_PH]]:
+; MAX2-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 16
+; MAX2-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]]
+; MAX2-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; MAX2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; MAX2-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16
+; MAX2-NEXT: [[TMP22:%.*]] = add i64 3, [[N_VEC]]
+; MAX2-NEXT: br label %[[VECTOR_BODY:.*]]
+; MAX2: [[VECTOR_BODY]]:
+; MAX2-NEXT: [[INDEX4:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VECTOR_BODY]] ]
+; MAX2-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]]
+; MAX2-NEXT: [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 0
+; MAX2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP23]]
+; MAX2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP24]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP25]], align 1
+; MAX2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP23]]
+; MAX2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0
+; MAX2-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP27]], align 1
+; MAX2-NEXT: [[TMP28:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD5]]
+; MAX2-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]]
+; MAX2-NEXT: [[TMP29:%.*]] = xor <vscale x 16 x i1> [[TMP28]], splat (i1 true)
+; MAX2-NEXT: [[TMP30:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP29]])
+; MAX2-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
+; MAX2-NEXT: [[TMP32:%.*]] = or i1 [[TMP30]], [[TMP31]]
+; MAX2-NEXT: br i1 [[TMP32]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; MAX2: [[MIDDLE_SPLIT]]:
+; MAX2-NEXT: br i1 [[TMP30]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; MAX2: [[MIDDLE_BLOCK]]:
+; MAX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; MAX2-NEXT: br i1 [[CMP_N]], label %[[LOOP_END:.*]], label %[[SCALAR_PH]]
+; MAX2: [[VECTOR_EARLY_EXIT]]:
+; MAX2-NEXT: br label %[[LOOP_END]]
+; MAX2: [[SCALAR_PH]]:
+; MAX2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ], [ 3, %[[VECTOR_SCEVCHECK]] ]
+; MAX2-NEXT: br label %[[LOOP:.*]]
+; MAX2: [[LOOP]]:
+; MAX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; MAX2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; MAX2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; MAX2-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; MAX2-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; MAX2-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]]
+; MAX2: [[LOOP_INC]]:
+; MAX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; MAX2-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; MAX2-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]], !llvm.loop [[LOOP13:![0-9]+]]
+; MAX2: [[LOOP_END]]:
+; MAX2-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, %[[LOOP]] ], [ 0, %[[LOOP_INC]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ 1, %[[VECTOR_EARLY_EXIT]] ]
+; MAX2-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop
@@ -440,3 +929,26 @@ loop.end:
attributes #0 = { "target-features"="+sve" vscale_range(1,16) }
attributes #1 = { "target-features"="+sve" }
+;.
+; MAX1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; MAX1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; MAX1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; MAX1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; MAX1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; MAX1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+;.
+; MAX2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; MAX2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; MAX2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; MAX2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; MAX2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; MAX2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; MAX2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; MAX2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
+; MAX2: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; MAX2: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
+; MAX2: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; MAX2: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
+; MAX2: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; MAX2: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
index 929cc5b84d922..d2e3cc9cdb018 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -passes=loop-vectorize \
-; RUN: -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f | FileCheck %s
+; RUN: -scalable-vectorization=on -max-num-faulting-pointers=1 -mtriple riscv64-linux-gnu \
+; RUN: -mattr=+v,+f | FileCheck %s
target triple = "riscv64"
@@ -151,19 +152,51 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK: vector.scevcheck:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i5
+; CHECK-NEXT: [[TMP1:%.*]] = add i5 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = zext i5 [[TMP1]] to i64
+; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: br label [[LOOP1:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP1]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32
+; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP1]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END1:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: br label [[LOOP_END1]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ], [ 3, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END1]]
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END1]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[LOOP_END]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -275,19 +308,51 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p
; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK: vector.scevcheck:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[P11]] to i5
+; CHECK-NEXT: [[TMP1:%.*]] = add i5 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = zext i5 [[TMP1]] to i64
+; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32
+; CHECK-NEXT: [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.split:
+; CHECK-NEXT: br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK: middle.block:
+; CHECK-NEXT: br i1 true, label [[LOOP_END1:%.*]], label [[SCALAR_PH]]
+; CHECK: vector.early.exit:
+; CHECK-NEXT: br label [[LOOP_END1]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ], [ 3, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END1]]
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END1]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[LOOP_END]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -315,7 +380,7 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1
; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(
; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK-NEXT: br label [[LOOP1:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
@@ -325,9 +390,9 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1
; CHECK: loop.inc:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
@@ -396,3 +461,11 @@ loop.end:
attributes #0 = { vscale_range(2,1024) }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
index 23362004b1b2a..69477d6256491 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=1 | FileCheck %s
declare void @init_mem(ptr, i64);
>From 4884adc0e5a8d3d8c31dd36772b506ae834e56fc Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 24 Feb 2025 10:46:35 +0000
Subject: [PATCH 3/4] Address review comments
---
.../Vectorize/LoopVectorizationLegality.h | 6 +++---
.../Vectorize/LoopVectorizationLegality.cpp | 13 ++++++++-----
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +++---
3 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index b4bf528472c5c..6c0a3846cd946 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -434,9 +434,9 @@ class LoopVectorizationLegality {
/// Return a vector of all potentially faulting pointers in a loop with
/// uncountable early exits.
- const SmallVectorImpl<std::pair<const SCEV *, Type *>> *
+ ArrayRef<std::pair<const SCEV *, Type *>>
getPotentiallyFaultingPointers() const {
- return &PotentiallyFaultingPtrs;
+ return PotentiallyFaultingPtrs;
}
/// Returns a HistogramInfo* for the given instruction if it was determined
@@ -547,7 +547,7 @@ class LoopVectorizationLegality {
/// Returns true if all loads in the loop contained in \p Loads can be
/// analyzed as potentially faulting. Any loads that may fault are added to
/// the member variable PotentiallyFaultingPtrs.
- bool analyzePotentiallyFaultingLoads(SmallVectorImpl<LoadInst *> *Loads);
+ bool analyzePotentiallyFaultingLoads(SmallVectorImpl<LoadInst *> &Loads);
/// Return true if all of the instructions in the block can be speculatively
/// executed, and record the loads/stores that require masking.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index ff3954d556e39..daf15646ee45a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1603,17 +1603,20 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
}
bool LoopVectorizationLegality::analyzePotentiallyFaultingLoads(
- SmallVectorImpl<LoadInst *> *Loads) {
+ SmallVectorImpl<LoadInst *> &Loads) {
LLVM_DEBUG(dbgs() << "LV: Looking for potentially faulting loads in loop "
"with uncountable early exit:\n");
- for (LoadInst *LI : *Loads) {
+ for (LoadInst *LI : Loads) {
LLVM_DEBUG(dbgs() << "LV: Load: " << *LI << '\n');
- Value *Ptr = LI->getPointerOperand();
- if (!Ptr)
+ if (LI->getPointerAddressSpace())
return false;
+
+ Value *Ptr = LI->getPointerOperand();
const SCEV *PtrExpr = PSE.getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
// TODO: Deal with loop invariant pointers.
+ // NOTE: The reasoning below is only safe if the load executes at least
+ // once.
if (!AR || AR->getLoop() != TheLoop || !AR->isAffine())
return false;
auto Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*PSE.getSE()));
@@ -1785,7 +1788,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
if (!NonDerefLoads.empty()) {
if (!TTI->getMinPageSize() ||
- !analyzePotentiallyFaultingLoads(&NonDerefLoads)) {
+ !analyzePotentiallyFaultingLoads(NonDerefLoads)) {
PotentiallyFaultingPtrs.clear();
reportVectorizationFailure(
"Loop may fault",
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fd38fb4e0a42f..358603289ec2d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2170,13 +2170,13 @@ class GeneratedRTChecks {
} // namespace
static void addPointerAlignmentChecks(
- const SmallVectorImpl<std::pair<const SCEV *, Type *>> *Ptrs, Function *F,
+ ArrayRef<std::pair<const SCEV *, Type *>> Ptrs, Function *F,
PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF,
unsigned IC) {
ScalarEvolution *SE = PSE.getSE();
const DataLayout &DL = SE->getDataLayout();
- for (auto Ptr : *Ptrs) {
+ for (auto Ptr : Ptrs) {
Type *PtrIntType = DL.getIntPtrType(Ptr.first->getType());
APInt EltSize(PtrIntType->getScalarSizeInBits(),
DL.getTypeStoreSize(Ptr.second).getFixedValue());
@@ -2186,7 +2186,7 @@ static void addPointerAlignmentChecks(
SE->getMulExpr(ScevEC, SE->getConstant(EltSize),
(SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
const SCEV *Rem = SE->getURemExpr(Start, Align);
- PSE.addPredicate(*(SE->getEqualPredicate(Rem, SE->getZero(PtrIntType))));
+ PSE.addPredicate(*SE->getEqualPredicate(Rem, SE->getZero(PtrIntType)));
}
}
>From 10d7a812e4db0bf88240baf5570f6756aaf3085b Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 24 Feb 2025 11:02:38 +0000
Subject: [PATCH 4/4] Fix formatting issues
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 358603289ec2d..7f9ffffc16efe 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2169,10 +2169,11 @@ class GeneratedRTChecks {
};
} // namespace
-static void addPointerAlignmentChecks(
- ArrayRef<std::pair<const SCEV *, Type *>> Ptrs, Function *F,
- PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI, ElementCount VF,
- unsigned IC) {
+static void
+addPointerAlignmentChecks(ArrayRef<std::pair<const SCEV *, Type *>> Ptrs,
+ Function *F, PredicatedScalarEvolution &PSE,
+ TargetTransformInfo *TTI, ElementCount VF,
+ unsigned IC) {
ScalarEvolution *SE = PSE.getSE();
const DataLayout &DL = SE->getDataLayout();
More information about the llvm-commits
mailing list