[llvm] [WIP][LoopVectorize] Perform loop versioning for some early exit loops (PR #120603)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 13 05:19:56 PST 2025


https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/120603

>From 773f432b1f272412265db117c7a5cbafe2a965de Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 13 Jan 2025 13:17:51 +0000
Subject: [PATCH 1/2] Add tests

---
 .../AArch64/single_early_exit_unsafe_ptrs.ll  | 398 ++++++++++++++++++
 .../RISCV/single_early_exit_unsafe_ptrs.ll    | 398 ++++++++++++++++++
 .../single_early_exit_unsafe_ptrs.ll          |  42 +-
 3 files changed, 837 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
new file mode 100644
index 00000000000000..be66c1e7bf68e2
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @init_mem(ptr, i64);
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = alloca [42 x i8], align 4
+; CHECK-NEXT:    [[P2:%.*]] = alloca [42 x i8], align 4
+; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  %p1 = alloca [42 x i8]
+  %p2 = alloca [42 x i8]
+  call void @init_mem(ptr %p1, i64 1024)
+  call void @init_mem(ptr %p2, i64 1024)
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx1, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
+; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx1, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx1, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %stride2 = mul i64 %index, 2
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %mul = mul i64 %index, %stride
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) #1 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i32, ptr %arrayidx, align 4
+  %cmp3 = icmp eq i32 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx2, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+attributes #0 = { "target-features"="+sve" vscale_range(1,16) }
+attributes #1 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
new file mode 100644
index 00000000000000..929cc5b84d9222
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -passes=loop-vectorize \
+; RUN:   -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f | FileCheck %s
+
+target triple = "riscv64"
+
+declare void @init_mem(ptr, i64);
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_allocas() #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P1:%.*]] = alloca [42 x i8], align 1
+; CHECK-NEXT:    [[P2:%.*]] = alloca [42 x i8], align 1
+; CHECK-NEXT:    call void @init_mem(ptr [[P1]], i64 1024)
+; CHECK-NEXT:    call void @init_mem(ptr [[P2]], i64 1024)
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  %p1 = alloca [42 x i8]
+  %p2 = alloca [42 x i8]
+  call void @init_mem(ptr %p1, i64 1024)
+  call void @init_mem(ptr %p2, i64 1024)
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx1, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(
+; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx1, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx1, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride2(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[STRIDE2:%.*]] = mul i64 [[INDEX]], 2
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[STRIDE2]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %stride2 = mul i64 %index, 2
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %stride2
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(ptr %p1, i64 %stride) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_unknown(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[INDEX]], [[STRIDE]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[MUL]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %mul = mul i64 %index, %stride
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %mul
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p1, i64 %stride) {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
+; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(
+; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i32, ptr %arrayidx, align 4
+  %cmp3 = icmp eq i32 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %arrayidx2 = getelementptr inbounds i8, ptr %p2, i64 %index
+  %ld2 = load i8, ptr %arrayidx2, align 1
+  %cmp3 = icmp eq i8 %ld1, %ld2
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}
+
+
+attributes #0 = { vscale_range(2,1024) }
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
index c68eeac19c9ecf..23362004b1b2a1 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S < %s -p loop-vectorize | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
 
 declare void @init_mem(ptr, i64);
 
@@ -141,3 +141,43 @@ loop.end:
   %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ]
   ret i64 %retval
 }
+
+
+define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) {
+; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
+; CHECK-SAME: ptr [[P1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK:       loop.inc:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK:       loop.end:
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    ret i64 [[RETVAL]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ]
+  %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index
+  %ld1 = load i8, ptr %arrayidx, align 1
+  %cmp3 = icmp eq i8 %ld1, 3
+  br i1 %cmp3, label %loop.inc, label %loop.end
+
+loop.inc:
+  %index.next = add i64 %index, 1
+  %exitcond = icmp ne i64 %index.next, 67
+  br i1 %exitcond, label %loop, label %loop.end
+
+loop.end:
+  %retval = phi i64 [ 1, %loop ], [ 0, %loop.inc ]
+  ret i64 %retval
+}

>From ca000fbaa6ae5d9a6e60db891aa96b18e3185acf Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Mon, 13 Jan 2025 13:18:00 +0000
Subject: [PATCH 2/2] [LoopVectorize] Perform loop versioning for some early
 exit loops

When attempting to vectorise a loop with an uncountable early
exit, we attempt to discover if all the loads in the loop are
known to be dereferenceable. If at least one load could
potentially fault then we abandon vectorisation. This patch
adds support for vectorising loops with one potentially
faulting load by versioning the loop based on the load
pointer alignment. It is required that the vector load must
always fault on the first lane, i.e. the load should not
straddle a page boundary. Doing so ensures that the behaviour
of the vector and scalar loops is identical, i.e. if a load
does fault it will fault at the same scalar iteration.
Such vectorisation depends on the following conditions being
met:

1. The max vector width must not exceed the minimum page size.
This is done by adding a getMaxSafeVectorWidthInBits
wrapper that checks if we have an uncountable early exit.
For scalable vectors we must be able to determine the maximum
possible value of vscale.
2. The size of the loaded type must be a power of 2. This is
checked during legalisation.
3. The VF must be a power of two (so that the vector width can
divide wholly into the page size which is also power of 2).
For fixed-width vectors this is always true, and for scalable
vectors we query the TTI hook isVScaleKnownToBeAPowerOfTwo.
If the effective runtime VF could change during the loop then
this cannot be vectorised via loop versioning.
4. The load pointer must be aligned to a multiple of the vector
width. (NOTE: interleaving is currently disabled for these early
exit loops.) We add a runtime check to ensure this is true.
---
 llvm/include/llvm/Analysis/Loads.h            |   3 +-
 .../Vectorize/LoopVectorizationLegality.h     |  22 ++
 llvm/lib/Analysis/Loads.cpp                   |  21 +-
 .../Vectorize/LoopVectorizationLegality.cpp   |  56 ++++-
 .../Transforms/Vectorize/LoopVectorize.cpp    | 122 +++++++++--
 .../AArch64/single_early_exit_unsafe_ptrs.ll  | 204 +++++++++++++++---
 .../RISCV/single_early_exit_unsafe_ptrs.ll    |  90 +++++++-
 7 files changed, 444 insertions(+), 74 deletions(-)

diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
index 639070c07897b0..828f64e0e59432 100644
--- a/llvm/include/llvm/Analysis/Loads.h
+++ b/llvm/include/llvm/Analysis/Loads.h
@@ -92,7 +92,8 @@ bool isDereferenceableAndAlignedInLoop(
 /// contains read-only memory accesses.
 bool isDereferenceableReadOnlyLoop(
     Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
-    SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
+    SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr,
+    SmallVectorImpl<LoadInst *> *NonDerefLoads = nullptr);
 
 /// Return true if we know that executing a load from this value cannot trap.
 ///
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 72fda911962ad2..247e4ef030b270 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -429,6 +429,19 @@ class LoopVectorizationLegality {
   unsigned getNumStores() const { return LAI->getNumStores(); }
   unsigned getNumLoads() const { return LAI->getNumLoads(); }
 
+  /// Return the number of loads in the loop that could potentially fault in a
+  /// loop with uncountable early exits.
+  unsigned getNumPotentiallyFaultingLoads() const {
+    return PotentiallyFaultingLoads.size();
+  }
+
+  /// Return a vector of all potentially faulting loads in a loop with
+  /// uncountable early exits.
+  const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *
+  getPotentiallyFaultingLoads() const {
+    return &PotentiallyFaultingLoads;
+  }
+
   /// Returns a HistogramInfo* for the given instruction if it was determined
   /// to be part of a load -> update -> store sequence where multiple lanes
   /// may be working on the same memory address.
@@ -537,6 +550,11 @@ class LoopVectorizationLegality {
   /// additional cases safely.
   bool isVectorizableEarlyExitLoop();
 
+  /// Returns true if all loads in the loop contained in \p Loads can be
+  /// analyzed as potentially faulting. Any loads that may fault are added to
+  /// the member variable PotentiallyFaultingLoads.
+  bool analyzePotentiallyFaultingLoads(SmallVectorImpl<LoadInst *> *Loads);
+
   /// Return true if all of the instructions in the block can be speculatively
   /// executed, and record the loads/stores that require masking.
   /// \p SafePtrs is a list of addresses that are known to be legal and we know
@@ -666,6 +684,10 @@ class LoopVectorizationLegality {
   /// Keep track of the destinations of all uncountable exits if the
   /// exact backedge taken count is not computable.
   SmallVector<BasicBlock *, 4> UncountableExitBlocks;
+
+  /// Keep a record of all potentially faulting loads in loops with
+  /// uncountable early exits.
+  SmallVector<std::pair<LoadInst *, const SCEV *>, 4> PotentiallyFaultingLoads;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index bc03e4052a705b..d766058e5c0d7f 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -805,15 +805,26 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
 
 bool llvm::isDereferenceableReadOnlyLoop(
     Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
-    SmallVectorImpl<const SCEVPredicate *> *Predicates) {
+    SmallVectorImpl<const SCEVPredicate *> *Predicates,
+    SmallVectorImpl<LoadInst *> *NonDerefLoads) {
+  bool Result = true;
   for (BasicBlock *BB : L->blocks()) {
     for (Instruction &I : *BB) {
       if (auto *LI = dyn_cast<LoadInst>(&I)) {
-        if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
+        if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC,
+                                               Predicates)) {
+          if (!NonDerefLoads)
+            return false;
+          NonDerefLoads->push_back(LI);
+          Result = false;
+        }
+      } else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
+                 I.mayThrow()) {
+        if (!NonDerefLoads)
           return false;
-      } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
-        return false;
+        Result = false;
+      }
     }
   }
-  return true;
+  return Result;
 }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 406864a6793dc8..fdc8bd39524557 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1609,6 +1609,43 @@ bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
   return Result;
 }
 
+bool LoopVectorizationLegality::analyzePotentiallyFaultingLoads(
+    SmallVectorImpl<LoadInst *> *Loads) {
+  LLVM_DEBUG(dbgs() << "LV: Looking for potentially faulting loads in loop "
+                       "with uncountable early exit:\n");
+  for (LoadInst *LI : *Loads) {
+    LLVM_DEBUG(dbgs() << "LV:   Load: " << *LI << '\n');
+    Value *Ptr = LI->getPointerOperand();
+    if (!Ptr)
+      return false;
+    const SCEV *PtrExpr = PSE.getSCEV(Ptr);
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr);
+    // TODO: Deal with loop invariant pointers.
+    if (!AR || AR->getLoop() != TheLoop || !AR->isAffine())
+      return false;
+    auto Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*PSE.getSE()));
+    if (!Step)
+      return false;
+    const SCEV *Start = AR->getStart();
+
+    // Make sure the step is positive and matches the object size in memory.
+    // TODO: Extend this to cover more cases.
+    auto &DL = LI->getDataLayout();
+    APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+                  DL.getTypeStoreSize(LI->getType()).getFixedValue());
+
+    // Also discard element sizes that are not a power of 2, since the loop
+    // vectorizer can only perform loop versioning with pointer alignment
+    // checks for vector loads that are power-of-2 in size.
+    if (EltSize != Step->getAPInt() || !EltSize.isPowerOf2())
+      return false;
+
+    LLVM_DEBUG(dbgs() << "LV: SCEV for Load Ptr: " << *Start << '\n');
+    PotentiallyFaultingLoads.push_back({LI, Start});
+  }
+  return true;
+}
+
 bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   BasicBlock *LatchBB = TheLoop->getLoopLatch();
   if (!LatchBB) {
@@ -1731,15 +1768,18 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   assert(LatchBB->getUniquePredecessor() == getUncountableEarlyExitingBlock() &&
          "Expected latch predecessor to be the early exiting block");
 
-  // TODO: Handle loops that may fault.
   Predicates.clear();
-  if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
-                                     &Predicates)) {
-    reportVectorizationFailure(
-        "Loop may fault",
-        "Cannot vectorize potentially faulting early exit loop",
-        "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
-    return false;
+  SmallVector<LoadInst *, 4> Loads;
+  if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, &Predicates,
+                                     &Loads)) {
+    if (!TTI->getMinPageSize() || !analyzePotentiallyFaultingLoads(&Loads)) {
+      reportVectorizationFailure(
+          "Loop may fault",
+          "Cannot vectorize potentially faulting early exit loop",
+          "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
+      return false;
+    }
+    LLVM_DEBUG(dbgs() << "We can vectorize the loop with runtime checks.\n");
   }
 
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b017b61a45a0c3..b827792eeeddcc 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -390,6 +390,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
     cl::desc(
         "Enable vectorization of early exit loops with uncountable exits."));
 
+static cl::opt<unsigned> MaxNumPotentiallyFaultingPointers(
+    "max-num-faulting-pointers", cl::init(1), cl::Hidden,
+    cl::desc(
+        "The maximum number of potentially faulting pointers we permit when "
+        "vectorizing loops with uncountable exits."));
+
 // Likelyhood of bypassing the vectorized loop because assumptions about SCEV
 // variables not overflowing do not hold. See `emitSCEVChecks`.
 static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127};
@@ -1582,6 +1588,22 @@ class LoopVectorizationCostModel {
                                        ElementCount MaxSafeVF,
                                        bool FoldTailByMasking);
 
+  bool isSafeForAnyVectorWidth() const {
+    return Legal->isSafeForAnyVectorWidth() &&
+           (!Legal->hasUncountableEarlyExit() ||
+            !Legal->getNumPotentiallyFaultingLoads());
+  }
+
+  uint64_t getMaxSafeVectorWidthInBits() const {
+    uint64_t MaxSafeVectorWidth = Legal->getMaxSafeVectorWidthInBits();
+    // The legalizer bails out if getMinPageSize does not return a value.
+    if (Legal->hasUncountableEarlyExit() &&
+        Legal->getNumPotentiallyFaultingLoads())
+      MaxSafeVectorWidth =
+          std::min(MaxSafeVectorWidth, uint64_t(*TTI.getMinPageSize()) * 8);
+    return MaxSafeVectorWidth;
+  }
+
   /// Checks if scalable vectorization is supported and enabled. Caches the
   /// result to avoid repeated debug dumps for repeated queries.
   bool isScalableVectorizationAllowed();
@@ -2123,6 +2145,41 @@ class GeneratedRTChecks {
 };
 } // namespace
 
+std::optional<unsigned> getMaxVScale(const Function &F,
+                                     const TargetTransformInfo &TTI) {
+  if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
+    return MaxVScale;
+
+  if (F.hasFnAttribute(Attribute::VScaleRange))
+    return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
+
+  return std::nullopt;
+}
+
+static void addPointerAlignmentChecks(
+    const SmallVectorImpl<std::pair<LoadInst *, const SCEV *>> *Loads,
+    Function *F, PredicatedScalarEvolution &PSE, TargetTransformInfo *TTI,
+    ElementCount VF) {
+  ScalarEvolution *SE = PSE.getSE();
+  const DataLayout &DL = SE->getDataLayout();
+  Type *PtrIntType = DL.getIntPtrType(SE->getContext());
+
+  const SCEV *Zero = SE->getZero(PtrIntType);
+  const SCEV *ScevEC = SE->getElementCount(PtrIntType, VF);
+
+  for (auto Load : *Loads) {
+    APInt EltSize(
+        DL.getIndexTypeSizeInBits(Load.first->getPointerOperandType()),
+        DL.getTypeStoreSize(Load.first->getType()).getFixedValue());
+    const SCEV *Start = SE->getPtrToIntExpr(Load.second, PtrIntType);
+    const SCEV *Align =
+        SE->getMulExpr(ScevEC, SE->getConstant(EltSize),
+                       (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW));
+    const SCEV *Rem = SE->getURemExpr(Start, Align);
+    PSE.addPredicate(*(SE->getEqualPredicate(Rem, Zero)));
+  }
+}
+
 static bool useActiveLaneMask(TailFoldingStyle Style) {
   return Style == TailFoldingStyle::Data ||
          Style == TailFoldingStyle::DataAndControlFlow ||
@@ -2292,17 +2349,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue,
   llvm_unreachable("invalid enum");
 }
 
-std::optional<unsigned> getMaxVScale(const Function &F,
-                                     const TargetTransformInfo &TTI) {
-  if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
-    return MaxVScale;
-
-  if (F.hasFnAttribute(Attribute::VScaleRange))
-    return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
-
-  return std::nullopt;
-}
-
 /// For the given VF and UF and maximum trip count computed for the loop, return
 /// whether the induction variable might overflow in the vectorized loop. If not,
 /// then we know a runtime overflow check always evaluates to false and can be
@@ -3899,13 +3945,22 @@ bool LoopVectorizationCostModel::isScalableVectorizationAllowed() {
     return false;
   }
 
-  if (!Legal->isSafeForAnyVectorWidth() && !getMaxVScale(*TheFunction, TTI)) {
+  if (!isSafeForAnyVectorWidth() && !getMaxVScale(*TheFunction, TTI)) {
     reportVectorizationInfo("The target does not provide maximum vscale value "
                             "for safe distance analysis.",
                             "ScalableVFUnfeasible", ORE, TheLoop);
     return false;
   }
 
+  if (Legal->hasUncountableEarlyExit() &&
+      Legal->getNumPotentiallyFaultingLoads() &&
+      !TTI.isVScaleKnownToBeAPowerOfTwo()) {
+    reportVectorizationInfo("Cannot vectorize potentially faulting early exit "
+                            "loop with scalable vectors.",
+                            "ScalableVFUnfeasible", ORE, TheLoop);
+    return false;
+  }
+
   IsScalableVectorizationAllowed = true;
   return true;
 }
@@ -3917,7 +3972,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
 
   auto MaxScalableVF = ElementCount::getScalable(
       std::numeric_limits<ElementCount::ScalarTy>::max());
-  if (Legal->isSafeForAnyVectorWidth())
+  if (isSafeForAnyVectorWidth())
     return MaxScalableVF;
 
   std::optional<unsigned> MaxVScale = getMaxVScale(*TheFunction, TTI);
@@ -3944,11 +3999,11 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
   // the memory accesses that is most restrictive (involved in the smallest
   // dependence distance).
   unsigned MaxSafeElements =
-      llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
+      llvm::bit_floor(getMaxSafeVectorWidthInBits() / WidestType);
 
   auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
   auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
-  if (!Legal->isSafeForAnyVectorWidth())
+  if (!isSafeForAnyVectorWidth())
     this->MaxSafeElements = MaxSafeElements;
 
   LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF
@@ -10346,11 +10401,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     return false;
   }
 
-  if (LVL.hasUncountableEarlyExit() && !EnableEarlyExitVectorization) {
-    reportVectorizationFailure("Auto-vectorization of loops with uncountable "
-                               "early exit is not enabled",
-                               "UncountableEarlyExitLoopsDisabled", ORE, L);
-    return false;
+  if (LVL.hasUncountableEarlyExit()) {
+    if (!EnableEarlyExitVectorization) {
+      reportVectorizationFailure("Auto-vectorization of loops with uncountable "
+                                 "early exit is not enabled",
+                                 "UncountableEarlyExitLoopsDisabled", ORE, L);
+      return false;
+    }
+
+    unsigned NumPotentiallyFaultingPointers =
+        LVL.getNumPotentiallyFaultingLoads();
+    if (NumPotentiallyFaultingPointers > MaxNumPotentiallyFaultingPointers) {
+      reportVectorizationFailure("Not worth vectorizing loop with uncountable "
+                                 "early exit, due to number of potentially "
+                                 "faulting loads",
+                                 "UncountableEarlyExitMayFault", ORE, L);
+      return false;
+    } else if (NumPotentiallyFaultingPointers)
+      LLVM_DEBUG(dbgs() << "LV: Need to version early-exit vector loop with "
+                        << "pointer alignment checks.\n");
   }
 
   if (LVL.hasStructVectorCall()) {
@@ -10508,8 +10577,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     unsigned SelectedIC = std::max(IC, UserIC);
     //  Optimistically generate runtime checks if they are needed. Drop them if
     //  they turn out to not be profitable.
-    if (VF.Width.isVector() || SelectedIC > 1)
+    if (VF.Width.isVector() || SelectedIC > 1) {
+      if (LVL.getNumPotentiallyFaultingLoads()) {
+        assert(SelectedIC == 1 &&
+               "Interleaving not supported for early exit loops and "
+               "potentially faulting loads");
+        assert(!CM.foldTailWithEVL() &&
+               "Explicit vector length unsupported for early exit loops and "
+               "potentially faulting loads");
+        addPointerAlignmentChecks(LVL.getPotentiallyFaultingLoads(), F, PSE,
+                                  TTI, VF.Width);
+      }
       Checks.create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+    }
 
     // Check if it is profitable to vectorize with runtime checks.
     bool ForceVectorization =
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
index be66c1e7bf68e2..04b40883913c47 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/single_early_exit_unsafe_ptrs.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization | FileCheck %s
+; RUN: opt -S < %s -p loop-vectorize -enable-early-exit-vectorization -max-num-faulting-pointers=2 \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-MAX2
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -150,19 +152,66 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
 ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
 ; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK-NEXT:    [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK:       vector.scevcheck:
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[P11]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw i64 [[P11]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i64 [[TMP8]], 4
+; CHECK-NEXT:    [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]]
+; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP12:%.*]] = mul i64 [[TMP11]], 16
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 64, [[TMP12]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], 16
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 3, [[N_VEC]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP18]], align 1
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], [[TMP14]]
+; CHECK-NEXT:    [[TMP20:%.*]] = xor <vscale x 16 x i1> [[TMP19]], splat (i1 true)
+; CHECK-NEXT:    [[TMP21:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP20]])
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
+; CHECK-NEXT:    [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]]
+; CHECK-NEXT:    br i1 [[TMP23]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP21]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 3, [[VECTOR_SCEVCHECK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
 ; CHECK:       loop.inc:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       loop.end:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[MIDDLE_SPLIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -274,19 +323,49 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p
 ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
 ; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK:       vector.scevcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[P11]] to i4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i4 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i4 [[TMP1]] to i64
+; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 16
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP7]])
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[VECTOR_SCEVCHECK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP1:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
 ; CHECK:       loop.inc:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       loop.end:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[MIDDLE_SPLIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -351,25 +430,84 @@ loop.end:
 
 
 define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(ptr %p1, ptr %p2) #0 {
-; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
-; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
-; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
-; CHECK-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
-; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
-; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
-; CHECK:       loop.inc:
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
-; CHECK:       loop.end:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
-; CHECK-NEXT:    ret i64 [[RETVAL]]
+; CHECK-MAX2-LABEL: define i64 @same_exit_block_no_live_outs_two_faulting_ptrs(
+; CHECK-MAX2-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-MAX2-NEXT:  entry:
+; CHECK-MAX2-NEXT:    [[P22:%.*]] = ptrtoint ptr [[P2]] to i64
+; CHECK-MAX2-NEXT:    [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-MAX2-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-MAX2-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; CHECK-MAX2-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
+; CHECK-MAX2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP2]]
+; CHECK-MAX2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK-MAX2:       vector.scevcheck:
+; CHECK-MAX2-NEXT:    [[TMP3:%.*]] = add i64 [[P11]], 3
+; CHECK-MAX2-NEXT:    [[TMP4:%.*]] = add nuw i64 [[P11]], 3
+; CHECK-MAX2-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-MAX2-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 4
+; CHECK-MAX2-NEXT:    [[TMP7:%.*]] = udiv i64 [[TMP4]], [[TMP6]]
+; CHECK-MAX2-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], [[TMP5]]
+; CHECK-MAX2-NEXT:    [[TMP9:%.*]] = shl i64 [[TMP8]], 4
+; CHECK-MAX2-NEXT:    [[TMP10:%.*]] = sub i64 [[TMP3]], [[TMP9]]
+; CHECK-MAX2-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-MAX2-NEXT:    [[TMP11:%.*]] = add i64 [[P22]], 3
+; CHECK-MAX2-NEXT:    [[TMP12:%.*]] = add nuw i64 [[P22]], 3
+; CHECK-MAX2-NEXT:    [[TMP13:%.*]] = udiv i64 [[TMP12]], [[TMP6]]
+; CHECK-MAX2-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP5]]
+; CHECK-MAX2-NEXT:    [[TMP15:%.*]] = shl i64 [[TMP14]], 4
+; CHECK-MAX2-NEXT:    [[TMP16:%.*]] = sub i64 [[TMP11]], [[TMP15]]
+; CHECK-MAX2-NEXT:    [[IDENT_CHECK3:%.*]] = icmp ne i64 [[TMP16]], 0
+; CHECK-MAX2-NEXT:    [[TMP17:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK3]]
+; CHECK-MAX2-NEXT:    br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-MAX2:       vector.ph:
+; CHECK-MAX2-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-MAX2-NEXT:    [[TMP19:%.*]] = mul i64 [[TMP18]], 16
+; CHECK-MAX2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 64, [[TMP19]]
+; CHECK-MAX2-NEXT:    [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]]
+; CHECK-MAX2-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-MAX2-NEXT:    [[TMP21:%.*]] = mul i64 [[TMP20]], 16
+; CHECK-MAX2-NEXT:    [[TMP22:%.*]] = add i64 3, [[N_VEC]]
+; CHECK-MAX2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-MAX2:       vector.body:
+; CHECK-MAX2-NEXT:    [[INDEX4:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-MAX2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX4]]
+; CHECK-MAX2-NEXT:    [[TMP23:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-MAX2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP23]]
+; CHECK-MAX2-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP24]], i32 0
+; CHECK-MAX2-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP25]], align 1
+; CHECK-MAX2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[TMP23]]
+; CHECK-MAX2-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 0
+; CHECK-MAX2-NEXT:    [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP27]], align 1
+; CHECK-MAX2-NEXT:    [[TMP28:%.*]] = icmp eq <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD5]]
+; CHECK-MAX2-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], [[TMP21]]
+; CHECK-MAX2-NEXT:    [[TMP29:%.*]] = xor <vscale x 16 x i1> [[TMP28]], splat (i1 true)
+; CHECK-MAX2-NEXT:    [[TMP30:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP29]])
+; CHECK-MAX2-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
+; CHECK-MAX2-NEXT:    [[TMP32:%.*]] = or i1 [[TMP30]], [[TMP31]]
+; CHECK-MAX2-NEXT:    br i1 [[TMP32]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-MAX2:       middle.split:
+; CHECK-MAX2-NEXT:    br i1 [[TMP30]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-MAX2:       middle.block:
+; CHECK-MAX2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
+; CHECK-MAX2-NEXT:    br i1 [[CMP_N]], label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK-MAX2:       scalar.ph:
+; CHECK-MAX2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 3, [[VECTOR_SCEVCHECK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-MAX2-NEXT:    br label [[LOOP:%.*]]
+; CHECK-MAX2:       loop:
+; CHECK-MAX2-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-MAX2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
+; CHECK-MAX2-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-MAX2-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]]
+; CHECK-MAX2-NEXT:    [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+; CHECK-MAX2-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
+; CHECK-MAX2-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
+; CHECK-MAX2:       loop.inc:
+; CHECK-MAX2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
+; CHECK-MAX2-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
+; CHECK-MAX2-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-MAX2:       loop.end:
+; CHECK-MAX2-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[MIDDLE_SPLIT]] ]
+; CHECK-MAX2-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
   br label %loop
@@ -396,3 +534,13 @@ loop.end:
 
 attributes #0 = { "target-features"="+sve" vscale_range(1,16) }
 attributes #1 = { "target-features"="+sve" }
+;.
+; CHECK-MAX2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-MAX2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-MAX2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-MAX2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK-MAX2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-MAX2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK-MAX2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK-MAX2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
index 929cc5b84d9222..f426eb6a82437e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/single_early_exit_unsafe_ptrs.ll
@@ -151,19 +151,49 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr(ptr %p1) #0 {
 ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr(
 ; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK:       vector.scevcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[P11]] to i5
+; CHECK-NEXT:    [[TMP1:%.*]] = add i5 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i5 [[TMP1]] to i64
+; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[LOOP1:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP1]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]])
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP1]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[VECTOR_SCEVCHECK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
 ; CHECK:       loop.inc:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       loop.end:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[MIDDLE_SPLIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -275,19 +305,49 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(ptr %p
 ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_no_vscale_range(
 ; CHECK-SAME: ptr [[P1:%.*]], i64 [[STRIDE:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P11:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK:       vector.scevcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[P11]] to i5
+; CHECK-NEXT:    [[TMP1:%.*]] = add i5 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i5 [[TMP1]] to i64
+; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD]], splat (i8 3)
+; CHECK-NEXT:    [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 32
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <32 x i1> [[TMP6]], splat (i1 true)
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP7]])
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.split:
+; CHECK-NEXT:    br i1 [[TMP8]], label [[LOOP_END:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[VECTOR_SCEVCHECK]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP1:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3
-; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]]
+; CHECK-NEXT:    br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]]
 ; CHECK:       loop.inc:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       loop.end:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ], [ 0, [[MIDDLE_BLOCK]] ], [ 1, [[MIDDLE_SPLIT]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -315,7 +375,7 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1
 ; CHECK-LABEL: define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(
 ; CHECK-SAME: ptr [[P1:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK-NEXT:    br label [[LOOP1:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]]
@@ -325,9 +385,9 @@ define i64 @same_exit_block_no_live_outs_one_faulting_ptr_stride_too_low(ptr %p1
 ; CHECK:       loop.inc:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]]
 ; CHECK:       loop.end:
-; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP]] ], [ 0, [[LOOP_INC]] ]
+; CHECK-NEXT:    [[RETVAL:%.*]] = phi i64 [ 1, [[LOOP1]] ], [ 0, [[LOOP_INC]] ]
 ; CHECK-NEXT:    ret i64 [[RETVAL]]
 ;
 entry:
@@ -396,3 +456,11 @@ loop.end:
 
 
 attributes #0 = { vscale_range(2,1024) }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+;.



More information about the llvm-commits mailing list