[llvm] [LoopVectorize] LLVM fails to vectorise loops with multi-bool varables (PR #89226)

Dinar Temirbulatov via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 15 04:49:16 PDT 2024


https://github.com/dtemirbulatov updated https://github.com/llvm/llvm-project/pull/89226

>From de0cd170bff29d7f9a7e60f8bcfc10438925d33f Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Fri, 3 May 2024 13:29:13 +0000
Subject: [PATCH 1/8] Original verison of
 llvm/test/Transforms/LoopVectorize/multicmp.ll

---
 .../test/Transforms/LoopVectorize/multicmp.ll | 1028 +++++++++++++++++
 1 file changed, 1028 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/multicmp.ll

diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/multicmp.ll
new file mode 100644
index 0000000000000..538b62ec06a33
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/multicmp.ll
@@ -0,0 +1,1028 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC2
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1-IC2
+
+
+; int multi_user_cmp(float* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   for (long long i = 0; i < n; i++) {
+;     if (a[i] < 0.0f) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;   return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    ret i32 [[TMP10]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+;int multi_user_cmp_int(int* a, long long n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  for (long long i = 0; i < n; i++) {
+;    if (a[i] < 0) {
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;  }
+;  return all ? 1 : any ? 2 : 3;
+;}
+define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    ret i32 [[TMP10]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp slt i32 %load1, 0
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; int multi_user_cmp_branch_use(float* a, int *b, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] < 0.0f;
+;     if (c) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;     if (c)
+;       b[i]++;
+;   }
+;  return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    br label [[IF_END6]]
+; CHECK:       if.end6:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP33]]
+; CHECK-NEXT:    ret i32 [[TMP34]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK-VF4-IC2:       if.then3:
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-VF4-IC2-NEXT:    br label [[IF_END6]]
+; CHECK-VF4-IC2:       if.end6:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP62:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP63:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP62]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP63]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[CMP1]], i1 true, i1 [[VEC_PHI4]]
+; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[CMP1]], i1 [[VEC_PHI2]], i1 false
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       if.then3:
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-VF1-IC2-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       if.end6:
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[TMP10]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[TMP12]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP22:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP23:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP22]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP23]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end6 ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %if.end6 ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %if.end6 ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  br i1 %cmp1, label %if.then3, label %if.end6
+
+if.then3:
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %load2 = load i32, ptr %arrayidx5, align 4
+  %inc = add nsw i32 %load2, 1
+  store i32 %inc, ptr %arrayidx5, align 4
+  br label %if.end6
+
+if.end6:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; int multi_user_cmp_branch_use_and_outside_bb_use(float* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   _Bool c;
+;   for (long long i = 0; i < n; i++) {
+;     c = a[i] < 0.0f;
+;     if (c) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;   return all ? c : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP10]], i32 [[TMP11]]
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP16]], i32 [[TMP17]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP18]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP13]], i32 [[TMP14]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP15]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = zext i1 %cmp1 to i32
+  %1 = select i1 %.any.0.off0, i32 2, i32 3
+  %2 = select i1 %all.0.off0., i32 %0, i32 %1
+  ret i32 %2
+}
+
+; Currently, this test-case is not supported.
+; int multi_user_cmp_fmax(float* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   float max = -INFINITY;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] > max;
+;     if (c) {
+;       max = a[i];
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;  return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_fmax(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %max.015 = phi float [ 0xFFF0000000000000, %entry ], [ %.max.0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp ogt float %load1, %max.015
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %.max.0 = select i1 %cmp1, float %load1, float %max.015
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; Currently, this test-case is not supported.
+; int multi_user_cmp_fmax(int* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   int max = 0;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] > max;
+;     if (c) {
+;       max = a[i];
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;  return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %max.015 = phi i32 [ 0, %entry ], [ %.max.0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %load1, %max.015
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %.max.0 = select i1 %cmp1, i32 %load1, i32 %max.015
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; Currently, this test-case is not supported.
+; int multi_user_cmp_use_store_offset(float* a, int *b, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] < 0.0f;
+;     if (c) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;    b[i+c] = any;
+;   }
+;   return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-VF4-IC2-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-VF4-IC2-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-VF4-IC2-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-VF4-IC2-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-VF1-IC2-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-VF1-IC2-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-VF1-IC2-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-VF1-IC2-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %conv4 = zext i1 %cmp1 to i32
+  %n32 = trunc i64 %n to i32
+  %add = add nuw nsw i32 %conv4, %n32
+  %idxprom5 = zext nneg i32 %add to i64
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %idxprom5
+  store i32 %conv4, ptr %arrayidx6, align 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; Not vectorising, compare instruction user %0 inside the loop
+define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP3]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP3]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %0 = sext i1 %cmp1 to i64
+  %1 = add i64 %0, %indvars.iv
+  %indvars.iv.next = add nuw nsw i64 %1, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %2 = select i1 %.any.0.off0, i32 2, i32 3
+  %3 = select i1 %all.0.off0., i32 1, i32 %2
+  ret i32 %3
+}
+
+; Not vectorising, non recurrent select instrction %0 inside the loop
+define i32 @multi_user_cmp_extra_select(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP2]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %0 = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %1 = select i1 %.any.0.off0, i32 2, i32 3
+  %2 = select i1 %all.0.off0., i32 1, i32 %1
+  ret i32 %2
+}

>From 53a4a2f8cf5cc69cd3edf1fd28158f42fe90e13b Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 18 Apr 2024 11:03:44 +0000
Subject: [PATCH 2/8] [LoopVectorize] LLVM fails to vectorise loops with
 multiple bool variables

This patch allows to consider compare instructions in the loop with multiple
use inside the loop and outside, if we can prove that compare instruction user
is a recurrent reduction or used in branching or outside the loop then it is
safe to consider to vectorise.

This change allows to vectorise this loop:
int foo(float* a, int n) {
  _Bool any = 0;
  _Bool all = 1;
  for (int i = 0; i < n; i++) {
    if (a[i] < 0.0f) {
      any = 1;
    } else {
      all = 0;
    }
  }
  return all ? 1 : any ? 2 : 3;
}
---
 llvm/include/llvm/Analysis/IVDescriptors.h    |  20 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |  65 +-
 .../Vectorize/LoopVectorizationLegality.cpp   |  25 +
 .../test/Transforms/LoopVectorize/multicmp.ll | 881 +++++++++++++++---
 4 files changed, 860 insertions(+), 131 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 5c7b613ac48c4..f18ab500c4d9f 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -76,11 +76,11 @@ class RecurrenceDescriptor {
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
                        Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
-                       unsigned MinWidthCastToRecurTy)
+                       unsigned MinWidthCastToRecurTy, Instruction *Cmp)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
         IsSigned(Signed), IsOrdered(Ordered),
-        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
+        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy), MultiCmp(Cmp) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -88,12 +88,13 @@ class RecurrenceDescriptor {
   class InstDesc {
   public:
     InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
-        : IsRecurrence(IsRecur), PatternLastInst(I),
-          RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
+        : IsRecurrence(IsRecur), PatternLastInst(I), RecKind(RecurKind::None),
+          ExactFPMathInst(ExactFP), Cmp(nullptr) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
+    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr,
+             Instruction *MultiCmp = nullptr)
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP) {}
+          ExactFPMathInst(ExactFP), Cmp(MultiCmp) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
@@ -105,6 +106,8 @@ class RecurrenceDescriptor {
 
     Instruction *getPatternInst() const { return PatternLastInst; }
 
+    Instruction *getMultiCmp() const { return Cmp; }
+
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
@@ -115,6 +118,8 @@ class RecurrenceDescriptor {
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
     Instruction *ExactFPMathInst;
+    // Mult-user compare instruction.
+    Instruction *Cmp;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -270,6 +275,8 @@ class RecurrenceDescriptor {
            cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
   }
 
+  Instruction *getMultiCmp() const { return MultiCmp; }
+
   /// Reductions may store temporary or final result to an invariant address.
   /// If there is such a store in the loop then, after successfull run of
   /// AddReductionVar method, this field will be assigned the last met store.
@@ -300,6 +307,7 @@ class RecurrenceDescriptor {
   SmallPtrSet<Instruction *, 8> CastInsts;
   // The minimum width used by the recurrence.
   unsigned MinWidthCastToRecurrenceType;
+  Instruction *MultiCmp = nullptr;
 };
 
 /// A struct for saving information about induction variables.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 699ddf271e9e8..b0c32e4c44c47 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -256,6 +256,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
+  Instruction *MultiCMP = nullptr;
   bool IsSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
@@ -400,6 +401,8 @@ bool RecurrenceDescriptor::AddReductionVar(
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
+    if (IsASelect)
+      MultiCMP = ReduxDesc.getMultiCmp();
 
     // A conditional reduction operation must only have 2 or less uses in
     // VisitedInsts.
@@ -597,7 +600,8 @@ bool RecurrenceDescriptor::AddReductionVar(
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
                           FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
+                          MultiCMP);
   RedDes = RD;
 
   return true;
@@ -635,14 +639,59 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
+  SelectInst *SI = dyn_cast<SelectInst>(I);
+  Instruction *Cmp = nullptr;
+
+  if (SI) {
+    bool HasOrigPhiUser = false;
+    bool SelectNonPHIUserInLoop = false;
+    auto Blocks = Loop->getBlocksVector();
+    for (User *U : SI->users()) {
+      Instruction *Inst = dyn_cast<Instruction>(U);
+      if (!Inst)
+        continue;
+      if (Inst == OrigPhi) {
+        HasOrigPhiUser = true;
+      } else {
+        if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
+            Blocks.end())
+          SelectNonPHIUserInLoop = true;
+      }
+    }
+    Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+    if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
+      bool IsSafeCMP = true;
+      for (User *U : Cmp->users()) {
+        Instruction *UInst = dyn_cast<Instruction>(U);
+        if (!UInst)
+          continue;
+        if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
+          if (!llvm::all_of(SI1->users(), [Blocks](User *USI) {
+                Instruction *Inst1 = dyn_cast<Instruction>(USI);
+                if (!Inst1 || (std::find(Blocks.begin(), Blocks.end(),
+                                         Inst1->getParent()) == Blocks.end() ||
+                               isa<PHINode>(Inst1)))
+                  return true;
+                return false;
+              }))
+            IsSafeCMP = false;
+        }
+        if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
+            std::find(Blocks.begin(), Blocks.end(), UInst->getParent()) !=
+                Blocks.end())
+          IsSafeCMP = false;
+      }
+      if (!IsSafeCMP)
+        Cmp = nullptr;
+    }
+  }
+
   // Only match select with single use cmp condition.
-  if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
-                         m_Value())))
+  if (!Cmp && !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
+                                 m_Value(), m_Value())))
     return InstDesc(false, I);
 
-  SelectInst *SI = cast<SelectInst>(I);
   Value *NonPhi = nullptr;
-
   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
     NonPhi = SI->getFalseValue();
   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
@@ -656,8 +705,10 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   if (!Loop->isLoopInvariant(NonPhi))
     return InstDesc(false, I);
 
-  return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
-                                                     : RecurKind::FAnyOf);
+  return InstDesc(I,
+                  isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
+                                                  : RecurKind::FAnyOf,
+                  nullptr, Cmp);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f54eebb2874ab..04758a7e8d8aa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -787,6 +787,7 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
+  DenseMap<Instruction *, unsigned> MultiCmpsRed;
 
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
@@ -830,6 +831,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
+          Instruction *Cmp = RedDes.getMultiCmp();
+          if (Cmp) {
+            if (MultiCmpsRed.contains(Cmp))
+              MultiCmpsRed[Cmp]++;
+            else
+              MultiCmpsRed[Cmp] = 1;
+          }
           continue;
         }
 
@@ -1045,6 +1053,23 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
+  // Make sure that all compare instruction users are recurrent if in loop's BB.
+  if (MultiCmpsRed.size() > 0) {
+    auto Blocks = TheLoop->getBlocksVector();
+    for (auto const &C : MultiCmpsRed) {
+      Instruction *Cmp = C.first;
+      unsigned Counter = 0;
+      for (User *U : Cmp->users()) {
+        SelectInst *Inst = dyn_cast<SelectInst>(U);
+        if (Inst && std::find(Blocks.begin(), Blocks.end(),
+                              Inst->getParent()) != Blocks.end())
+          Counter++;
+      }
+      if (Counter != C.second)
+        return false;
+    }
+  }
+
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.
diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/multicmp.ll
index 538b62ec06a33..17c7383afd8b0 100644
--- a/llvm/test/Transforms/LoopVectorize/multicmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/multicmp.ll
@@ -20,22 +20,55 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
 ; CHECK-NEXT:    ret i32 [[TMP10]]
@@ -43,22 +76,68 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
 ; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
@@ -66,22 +145,60 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX6]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
 ; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
@@ -124,22 +241,55 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
 ; CHECK-NEXT:    ret i32 [[TMP10]]
@@ -147,22 +297,68 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD4]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
 ; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
@@ -170,22 +366,60 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP4]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[TMP5]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX6]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
 ; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
@@ -231,11 +465,95 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 1
+; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP21]], [[PRED_STORE_IF5]] ]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP27]], [[PRED_STORE_IF7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP31:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP31]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP32]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
@@ -251,10 +569,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK:       if.end6:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP33]]
 ; CHECK-NEXT:    ret i32 [[TMP34]]
@@ -262,11 +580,151 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-VF4-IC2:       vector.memcheck:
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-VF4-IC2-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-VF4-IC2-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-VF4-IC2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI4]]
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI2]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-VF4-IC2:       pred.store.if:
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK-VF4-IC2:       pred.store.continue:
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_STORE_IF]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
+; CHECK-VF4-IC2:       pred.store.if6:
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-VF4-IC2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
+; CHECK-VF4-IC2:       pred.store.continue7:
+; CHECK-VF4-IC2-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP21]], [[PRED_STORE_IF6]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
+; CHECK-VF4-IC2:       pred.store.if8:
+; CHECK-VF4-IC2-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF4-IC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-VF4-IC2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE9]]
+; CHECK-VF4-IC2:       pred.store.continue9:
+; CHECK-VF4-IC2-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE7]] ], [ [[TMP27]], [[PRED_STORE_IF8]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP30]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
+; CHECK-VF4-IC2:       pred.store.if10:
+; CHECK-VF4-IC2-NEXT:    [[TMP31:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF4-IC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-VF4-IC2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP34:%.*]] = add nsw i32 [[TMP33]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP34]], ptr [[TMP32]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE11]]
+; CHECK-VF4-IC2:       pred.store.continue11:
+; CHECK-VF4-IC2-NEXT:    [[TMP35:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE9]] ], [ [[TMP33]], [[PRED_STORE_IF10]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP36]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
+; CHECK-VF4-IC2:       pred.store.if12:
+; CHECK-VF4-IC2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP39:%.*]] = add nsw i32 [[TMP38]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP39]], ptr [[TMP37]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE13]]
+; CHECK-VF4-IC2:       pred.store.continue13:
+; CHECK-VF4-IC2-NEXT:    [[TMP40:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE11]] ], [ [[TMP38]], [[PRED_STORE_IF12]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP41:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP41]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
+; CHECK-VF4-IC2:       pred.store.if14:
+; CHECK-VF4-IC2-NEXT:    [[TMP42:%.*]] = add i64 [[INDEX]], 5
+; CHECK-VF4-IC2-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP42]]
+; CHECK-VF4-IC2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP45:%.*]] = add nsw i32 [[TMP44]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP45]], ptr [[TMP43]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE15]]
+; CHECK-VF4-IC2:       pred.store.continue15:
+; CHECK-VF4-IC2-NEXT:    [[TMP46:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE13]] ], [ [[TMP44]], [[PRED_STORE_IF14]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP47:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP47]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
+; CHECK-VF4-IC2:       pred.store.if16:
+; CHECK-VF4-IC2-NEXT:    [[TMP48:%.*]] = add i64 [[INDEX]], 6
+; CHECK-VF4-IC2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP48]]
+; CHECK-VF4-IC2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP51:%.*]] = add nsw i32 [[TMP50]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP51]], ptr [[TMP49]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE17]]
+; CHECK-VF4-IC2:       pred.store.continue17:
+; CHECK-VF4-IC2-NEXT:    [[TMP52:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE15]] ], [ [[TMP50]], [[PRED_STORE_IF16]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP53:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP53]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]]
+; CHECK-VF4-IC2:       pred.store.if18:
+; CHECK-VF4-IC2-NEXT:    [[TMP54:%.*]] = add i64 [[INDEX]], 7
+; CHECK-VF4-IC2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP54]]
+; CHECK-VF4-IC2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP57:%.*]] = add nsw i32 [[TMP56]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP57]], ptr [[TMP55]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE19]]
+; CHECK-VF4-IC2:       pred.store.continue19:
+; CHECK-VF4-IC2-NEXT:    [[TMP58:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE17]] ], [ [[TMP56]], [[PRED_STORE_IF18]] ]
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP59]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP11]], <4 x i1> [[TMP12]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP20:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP60:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP20]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP60]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP22:%.*]] = icmp ne <4 x i1> [[TMP9]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT23:%.*]] = select <4 x i1> [[RDX_SELECT_CMP22]], <4 x i1> [[TMP9]], <4 x i1> [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP24:%.*]] = icmp ne <4 x i1> [[RDX_SELECT23]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP61:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP24]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT25:%.*]] = select i1 [[TMP61]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX26:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT25]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX26]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
@@ -282,10 +740,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2:       if.end6:
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP62:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP63:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP62]]
 ; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP63]]
@@ -293,30 +751,94 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-VF1-IC2:       vector.memcheck:
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-VF1-IC2-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-VF1-IC2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP4]], align 4, !alias.scope [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt float [[TMP6]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP8]], i1 true, i1 [[VEC_PHI4]]
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[TMP8]], i1 [[VEC_PHI2]], i1 false
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-VF1-IC2:       pred.store.if:
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = add nsw i32 [[TMP14]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[TMP15]], ptr [[TMP13]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK-VF1-IC2:       pred.store.continue:
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_STORE_IF]] ]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       pred.store.if5:
+; CHECK-VF1-IC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = add nsw i32 [[TMP18]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[TMP19]], ptr [[TMP17]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       pred.store.continue6:
+; CHECK-VF1-IC2-NEXT:    [[TMP20:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP18]], [[PRED_STORE_IF5]] ]
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP11]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP11]], i1 [[TMP12]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne i1 [[TMP9]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select i1 [[RDX_SELECT_CMP7]], i1 [[TMP9]], i1 [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX9:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX9]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[CMP1]], i1 true, i1 [[VEC_PHI4]]
-; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[CMP1]], i1 [[VEC_PHI2]], i1 false
-; CHECK-VF1-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
 ; CHECK-VF1-IC2:       if.then3:
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
 ; CHECK-VF1-IC2-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
-; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2-NEXT:    br label [[IF_END6]]
 ; CHECK-VF1-IC2:       if.end6:
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[TMP10]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[TMP12]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP22:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP23:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP22]]
 ; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP23]]
@@ -371,23 +893,57 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP9]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP10]], i32 [[TMP11]]
@@ -396,23 +952,70 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP14]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP15]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
 ; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP16]], i32 [[TMP17]]
@@ -421,23 +1024,61 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX6]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
 ; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP13]], i32 [[TMP14]]
@@ -583,7 +1224,7 @@ exit:
 }
 
 ; Currently, this test-case is not supported.
-; int multi_user_cmp_fmax(int* a, long long n) {
+; int multi_user_cmp_max(int* a, long long n) {
 ;   _Bool any = 0;
 ;   _Bool all = 1;
 ;   int max = 0;
@@ -613,7 +1254,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
@@ -638,7 +1279,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
@@ -663,7 +1304,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
 ; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
@@ -677,7 +1318,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 entry:
   br label %for.body
 
-for.body:
+for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
   %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
@@ -687,17 +1328,21 @@ for.body:
   %cmp1 = icmp sgt i32 %load1, %max.015
   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
-  %.max.0 = select i1 %cmp1, i32 %load1, i32 %max.015
+  %.max.0 = tail call i32 @llvm.smax.i32(i32 %load1, i32 %max.015)
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %exit, label %for.body
 
-exit:
-  %0 = select i1 %.any.0.off0, i32 2, i32 3
-  %1 = select i1 %all.0.off0., i32 1, i32 %0
+exit:                                             ; preds = %for.body
+  %.any.0.off0.lcssa = phi i1 [ %.any.0.off0, %for.body ]
+  %all.0.off0..lcssa = phi i1 [ %all.0.off0., %for.body ]
+  %0 = select i1 %.any.0.off0.lcssa, i32 2, i32 3
+  %1 = select i1 %all.0.off0..lcssa, i32 1, i32 %0
   ret i32 %1
 }
 
+declare i32 @llvm.smax.i32(i32, i32)
+
 ; Currently, this test-case is not supported.
 ; int multi_user_cmp_use_store_offset(float* a, int *b, long long n) {
 ;   _Bool any = 0;

>From 97528391af0463a36f3cce67ccfc8ab861660336 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Mon, 22 Apr 2024 12:06:19 +0000
Subject: [PATCH 3/8] Resolved remarks.

---
 llvm/lib/Analysis/IVDescriptors.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b0c32e4c44c47..9ce33d006e325 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -639,9 +639,11 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
+  // Find the compare instruction that is associated with OrigPhi, i.e
+  // recurrent-reduction. And determine that SelectInst and CmpInst multiple
+  // instructions usage are safe to vectorise.
   SelectInst *SI = dyn_cast<SelectInst>(I);
   Instruction *Cmp = nullptr;
-
   if (SI) {
     bool HasOrigPhiUser = false;
     bool SelectNonPHIUserInLoop = false;
@@ -653,6 +655,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       if (Inst == OrigPhi) {
         HasOrigPhiUser = true;
       } else {
+        // If we found SelectInstr usage in the loop then the reduction stops
+        // to be recurrent and it is not safe to procede further.
         if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
             Blocks.end())
           SelectNonPHIUserInLoop = true;
@@ -683,6 +687,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       }
       if (!IsSafeCMP)
         Cmp = nullptr;
+    } else {
+      Cmp = nullptr;
     }
   }
 

>From f1dfe175436d0a287c985ed10a84f65f3298b7e2 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 1 May 2024 07:00:37 +0000
Subject: [PATCH 4/8] Resolved remarks.

---
 llvm/include/llvm/Analysis/IVDescriptors.h    | 20 +++-------
 llvm/lib/Analysis/IVDescriptors.cpp           | 20 ++--------
 .../Vectorize/LoopVectorizationLegality.cpp   | 12 +++++-
 .../LoopVectorize/AArch64/select-multi-cmp.ll | 39 +++++++++++++++++++
 4 files changed, 59 insertions(+), 32 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index f18ab500c4d9f..5c7b613ac48c4 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -76,11 +76,11 @@ class RecurrenceDescriptor {
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
                        Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
-                       unsigned MinWidthCastToRecurTy, Instruction *Cmp)
+                       unsigned MinWidthCastToRecurTy)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
         IsSigned(Signed), IsOrdered(Ordered),
-        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy), MultiCmp(Cmp) {
+        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -88,13 +88,12 @@ class RecurrenceDescriptor {
   class InstDesc {
   public:
     InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
-        : IsRecurrence(IsRecur), PatternLastInst(I), RecKind(RecurKind::None),
-          ExactFPMathInst(ExactFP), Cmp(nullptr) {}
+        : IsRecurrence(IsRecur), PatternLastInst(I),
+          RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr,
-             Instruction *MultiCmp = nullptr)
+    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP), Cmp(MultiCmp) {}
+          ExactFPMathInst(ExactFP) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
@@ -106,8 +105,6 @@ class RecurrenceDescriptor {
 
     Instruction *getPatternInst() const { return PatternLastInst; }
 
-    Instruction *getMultiCmp() const { return Cmp; }
-
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
@@ -118,8 +115,6 @@ class RecurrenceDescriptor {
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
     Instruction *ExactFPMathInst;
-    // Mult-user compare instruction.
-    Instruction *Cmp;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -275,8 +270,6 @@ class RecurrenceDescriptor {
            cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
   }
 
-  Instruction *getMultiCmp() const { return MultiCmp; }
-
   /// Reductions may store temporary or final result to an invariant address.
   /// If there is such a store in the loop then, after successfull run of
   /// AddReductionVar method, this field will be assigned the last met store.
@@ -307,7 +300,6 @@ class RecurrenceDescriptor {
   SmallPtrSet<Instruction *, 8> CastInsts;
   // The minimum width used by the recurrence.
   unsigned MinWidthCastToRecurrenceType;
-  Instruction *MultiCmp = nullptr;
 };
 
 /// A struct for saving information about induction variables.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 9ce33d006e325..6f381c46a2870 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -256,7 +256,6 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
-  Instruction *MultiCMP = nullptr;
   bool IsSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
@@ -401,8 +400,6 @@ bool RecurrenceDescriptor::AddReductionVar(
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
-    if (IsASelect)
-      MultiCMP = ReduxDesc.getMultiCmp();
 
     // A conditional reduction operation must only have 2 or less uses in
     // VisitedInsts.
@@ -600,8 +597,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
                           FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
-                          MultiCMP);
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
   RedDes = RD;
 
   return true;
@@ -639,11 +635,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
-  // Find the compare instruction that is associated with OrigPhi, i.e
-  // recurrent-reduction. And determine that SelectInst and CmpInst multiple
-  // instructions usage are safe to vectorise.
   SelectInst *SI = dyn_cast<SelectInst>(I);
   Instruction *Cmp = nullptr;
+
   if (SI) {
     bool HasOrigPhiUser = false;
     bool SelectNonPHIUserInLoop = false;
@@ -655,8 +649,6 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       if (Inst == OrigPhi) {
         HasOrigPhiUser = true;
       } else {
-        // If we found SelectInstr usage in the loop then the reduction stops
-        // to be recurrent and it is not safe to procede further.
         if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
             Blocks.end())
           SelectNonPHIUserInLoop = true;
@@ -687,8 +679,6 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       }
       if (!IsSafeCMP)
         Cmp = nullptr;
-    } else {
-      Cmp = nullptr;
     }
   }
 
@@ -711,10 +701,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   if (!Loop->isLoopInvariant(NonPhi))
     return InstDesc(false, I);
 
-  return InstDesc(I,
-                  isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
-                                                  : RecurKind::FAnyOf,
-                  nullptr, Cmp);
+  return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
+                                                     : RecurKind::FAnyOf);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 04758a7e8d8aa..c5e6396857423 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -831,8 +831,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
-          Instruction *Cmp = RedDes.getMultiCmp();
-          if (Cmp) {
+          CmpInst *Cmp = nullptr;
+          for (Value *V :
+               {Phi->getIncomingValue(0), Phi->getIncomingValue(1)}) {
+            if (Instruction *SI = dyn_cast<SelectInst>(V))
+              Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+          }
+          if (Cmp && !Cmp->hasOneUse()) {
+            RecurKind Kind = RedDes.getRecurrenceKind();
+            assert((Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf) &&
+                   "Unexpected type of recurrence");
             if (MultiCmpsRed.contains(Cmp))
               MultiCmpsRed[Cmp]++;
             else
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
new file mode 100644
index 0000000000000..483240770e87b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %load1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   br i1 %exitcond.not, label %exit, label %for.body
+entry:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}

>From e77247f9b4d26e6d28ee327ec29c6b7726ee53c5 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 1 May 2024 21:38:33 +0000
Subject: [PATCH 5/8] Changed multi_user_cmp_max() function to use
 llvm.smax.i32 intrinsic, Replaced std::find(Blocks.begin(), Blocks.end(),...
 to Loop->contains(Inst->getParent()), added comments.

---
 llvm/lib/Analysis/IVDescriptors.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 6f381c46a2870..f584ef7cf9c30 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -639,9 +639,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   Instruction *Cmp = nullptr;
 
   if (SI) {
+    // Check that SelectInst is related to the this PHI reduction.
     bool HasOrigPhiUser = false;
     bool SelectNonPHIUserInLoop = false;
-    auto Blocks = Loop->getBlocksVector();
     for (User *U : SI->users()) {
       Instruction *Inst = dyn_cast<Instruction>(U);
       if (!Inst)
@@ -649,12 +649,12 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       if (Inst == OrigPhi) {
         HasOrigPhiUser = true;
       } else {
-        if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
-            Blocks.end())
+        if (Loop->contains(Inst->getParent()))
           SelectNonPHIUserInLoop = true;
       }
     }
     Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+    // Checking the current CmpInst is safe as a recurrent reduction.
     if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
       bool IsSafeCMP = true;
       for (User *U : Cmp->users()) {
@@ -662,19 +662,17 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
         if (!UInst)
           continue;
         if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
-          if (!llvm::all_of(SI1->users(), [Blocks](User *USI) {
+          if (!llvm::all_of(SI1->users(), [Loop](User *USI) {
                 Instruction *Inst1 = dyn_cast<Instruction>(USI);
-                if (!Inst1 || (std::find(Blocks.begin(), Blocks.end(),
-                                         Inst1->getParent()) == Blocks.end() ||
-                               isa<PHINode>(Inst1)))
+                if (!Inst1 || !Loop->contains(Inst1->getParent()) ||
+                    isa<PHINode>(Inst1))
                   return true;
                 return false;
               }))
             IsSafeCMP = false;
         }
         if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
-            std::find(Blocks.begin(), Blocks.end(), UInst->getParent()) !=
-                Blocks.end())
+            Loop->contains(UInst->getParent()))
           IsSafeCMP = false;
       }
       if (!IsSafeCMP)

>From e50cd3f49f9b6fcec0d5ac75761d733f439ac96f Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 2 May 2024 15:59:22 +0000
Subject: [PATCH 6/8] Fix a compile time failure, while testing proposed
 change.

---
 .../lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index c5e6396857423..fbdfc872fe395 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -837,10 +837,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
             if (Instruction *SI = dyn_cast<SelectInst>(V))
               Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
           }
-          if (Cmp && !Cmp->hasOneUse()) {
-            RecurKind Kind = RedDes.getRecurrenceKind();
-            assert((Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf) &&
-                   "Unexpected type of recurrence");
+          RecurKind Kind = RedDes.getRecurrenceKind();
+          if (Cmp && !Cmp->hasOneUse() &&
+              (Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf)) {
             if (MultiCmpsRed.contains(Cmp))
               MultiCmpsRed[Cmp]++;
             else

>From 41359126a94f3185763f39c29a76e10364291e98 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 16 May 2024 11:19:26 +0000
Subject: [PATCH 7/8] Update tests.

---
 llvm/lib/Analysis/IVDescriptors.cpp           |  52 +---
 .../Vectorize/LoopVectorizationLegality.cpp   |  32 ---
 .../LoopVectorize/AArch64/select-costs.ll     |  39 ++-
 .../LoopVectorize/AArch64/select-multi-cmp.ll |  39 ---
 .../{multicmp.ll => select-cmp-multiuse.ll}   | 268 ++++++++++--------
 .../Transforms/LoopVectorize/select-cmp.ll    |  27 --
 6 files changed, 183 insertions(+), 274 deletions(-)
 delete mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
 rename llvm/test/Transforms/LoopVectorize/{multicmp.ll => select-cmp-multiuse.ll} (89%)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index f584ef7cf9c30..ac6df22678434 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -635,57 +635,13 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
-  SelectInst *SI = dyn_cast<SelectInst>(I);
-  Instruction *Cmp = nullptr;
-
-  if (SI) {
-    // Check that SelectInst is related to the this PHI reduction.
-    bool HasOrigPhiUser = false;
-    bool SelectNonPHIUserInLoop = false;
-    for (User *U : SI->users()) {
-      Instruction *Inst = dyn_cast<Instruction>(U);
-      if (!Inst)
-        continue;
-      if (Inst == OrigPhi) {
-        HasOrigPhiUser = true;
-      } else {
-        if (Loop->contains(Inst->getParent()))
-          SelectNonPHIUserInLoop = true;
-      }
-    }
-    Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
-    // Checking the current CmpInst is safe as a recurrent reduction.
-    if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
-      bool IsSafeCMP = true;
-      for (User *U : Cmp->users()) {
-        Instruction *UInst = dyn_cast<Instruction>(U);
-        if (!UInst)
-          continue;
-        if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
-          if (!llvm::all_of(SI1->users(), [Loop](User *USI) {
-                Instruction *Inst1 = dyn_cast<Instruction>(USI);
-                if (!Inst1 || !Loop->contains(Inst1->getParent()) ||
-                    isa<PHINode>(Inst1))
-                  return true;
-                return false;
-              }))
-            IsSafeCMP = false;
-        }
-        if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
-            Loop->contains(UInst->getParent()))
-          IsSafeCMP = false;
-      }
-      if (!IsSafeCMP)
-        Cmp = nullptr;
-    }
-  }
-
-  // Only match select with single use cmp condition.
-  if (!Cmp && !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
-                                 m_Value(), m_Value())))
+  if (!match(I,
+             m_Select(m_Cmp(Pred, m_Value(), m_Value()), m_Value(), m_Value())))
     return InstDesc(false, I);
 
+  SelectInst *SI = cast<SelectInst>(I);
   Value *NonPhi = nullptr;
+
   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
     NonPhi = SI->getFalseValue();
   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index fbdfc872fe395..f54eebb2874ab 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -787,7 +787,6 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
-  DenseMap<Instruction *, unsigned> MultiCmpsRed;
 
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
@@ -831,20 +830,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
-          CmpInst *Cmp = nullptr;
-          for (Value *V :
-               {Phi->getIncomingValue(0), Phi->getIncomingValue(1)}) {
-            if (Instruction *SI = dyn_cast<SelectInst>(V))
-              Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
-          }
-          RecurKind Kind = RedDes.getRecurrenceKind();
-          if (Cmp && !Cmp->hasOneUse() &&
-              (Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf)) {
-            if (MultiCmpsRed.contains(Cmp))
-              MultiCmpsRed[Cmp]++;
-            else
-              MultiCmpsRed[Cmp] = 1;
-          }
           continue;
         }
 
@@ -1060,23 +1045,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
-  // Make sure that all compare instruction users are recurrent if in loop's BB.
-  if (MultiCmpsRed.size() > 0) {
-    auto Blocks = TheLoop->getBlocksVector();
-    for (auto const &C : MultiCmpsRed) {
-      Instruction *Cmp = C.first;
-      unsigned Counter = 0;
-      for (User *U : Cmp->users()) {
-        SelectInst *Inst = dyn_cast<SelectInst>(U);
-        if (Inst && std::find(Blocks.begin(), Blocks.end(),
-                              Inst->getParent()) != Blocks.end())
-          Counter++;
-      }
-      if (Counter != C.second)
-        return false;
-    }
-  }
-
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
index 1cde8b9bad6fc..25342040aad36 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
@@ -13,10 +13,6 @@ define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
 
-; CHECK-LABEL: define void @selects_1(
-; CHECK:       vector.body:
-; CHECK:         select <4 x i1>
-
 entry:
   %cmp26 = icmp sgt i32 %N, 0
   br i1 %cmp26, label %for.body.preheader, label %for.cond.cleanup
@@ -47,3 +43,38 @@ for.cond.cleanup.loopexit:                        ; preds = %for.body
 for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
   ret void
 }
+
+define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
+; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+entry:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; CHECK-LABEL: define void @selects_1(
+; CHECK:       vector.body:
+; CHECK:         select <4 x i1>
+
+; CHECK-LABEL: define i32 @multi_user_cmp(
+; CHECK:       vector.body:
+; CHECK:         %index = phi i64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
deleted file mode 100644
index 483240770e87b..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
-
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-unknown-linux-gnu"
-
-define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %load1 = load float, ptr %arrayidx, align 4
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   br i1 %exitcond.not, label %exit, label %for.body
-entry:
-  %wide.trip.count = zext nneg i32 %n to i64
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
-  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
-  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
-  %load1 = load float, ptr %arrayidx, align 4
-  %cmp1 = fcmp olt float %load1, 0.000000e+00
-  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
-  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  %0 = select i1 %.any.0.off0, i32 2, i32 3
-  %1 = select i1 %all.0.off0., i32 1, i32 %0
-  ret i32 %1
-}
diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
similarity index 89%
rename from llvm/test/Transforms/LoopVectorize/multicmp.ll
rename to llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index 17c7383afd8b0..e826d62dcb29d 100644
--- a/llvm/test/Transforms/LoopVectorize/multicmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -28,25 +28,26 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP12]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP8]]
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP11]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -84,10 +85,10 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -98,24 +99,24 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP17]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP19]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP13]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP20]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP19]], [[TMP17]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP14]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP18]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -153,10 +154,10 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -165,18 +166,22 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
@@ -249,25 +254,26 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP12]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP8]]
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP11]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -305,10 +311,10 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -319,24 +325,24 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP17]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP19]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP13]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP20]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP19]], [[TMP17]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP14]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP18]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -374,10 +380,10 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -386,18 +392,22 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP4]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[TMP5]], 0
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
@@ -481,15 +491,16 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP9:%.*]], [[PRED_STORE_CONTINUE8]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP5]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -537,12 +548,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP31:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP36:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT:    [[TMP31:%.*]] = freeze i1 [[TMP36]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP31]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP32]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP35:%.*]] = freeze i1 [[TMP32]]
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP35]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -596,10 +607,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE19]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP64:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
@@ -610,10 +621,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI4]]
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI2]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP64]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP66]] = or <4 x i1> [[VEC_PHI6]], [[TMP8]]
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = or <4 x i1> [[VEC_PHI4]], [[TMP12]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if:
@@ -704,16 +717,14 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP59]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP11]], <4 x i1> [[TMP12]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP20:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP20:%.*]] = or <4 x i1> [[TMP10]], [[TMP9]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP60:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP20]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP60]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP22:%.*]] = icmp ne <4 x i1> [[TMP9]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT23:%.*]] = select <4 x i1> [[RDX_SELECT_CMP22]], <4 x i1> [[TMP9]], <4 x i1> [[TMP10]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP24:%.*]] = icmp ne <4 x i1> [[RDX_SELECT23]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP67:%.*]] = freeze i1 [[TMP60]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP67]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP24:%.*]] = or <4 x i1> [[TMP66]], [[TMP64]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP61:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP24]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT25:%.*]] = select i1 [[TMP61]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP65:%.*]] = freeze i1 [[TMP61]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT25:%.*]] = select i1 [[TMP65]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -767,10 +778,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI6:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
@@ -779,10 +790,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP4]], align 4, !alias.scope [[META6]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt float [[TMP6]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP8]], i1 true, i1 [[VEC_PHI4]]
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[TMP8]], i1 [[VEC_PHI2]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP26]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP27]] = or i1 [[VEC_PHI6]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = xor i1 [[TMP8]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = or i1 [[VEC_PHI4]], [[TMP12]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF1-IC2:       pred.store.if:
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
@@ -805,10 +818,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP11]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP11]], i1 [[TMP12]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne i1 [[TMP9]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select i1 [[RDX_SELECT_CMP7]], i1 [[TMP9]], i1 [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP10]], [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[TMP24:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP24]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX7:%.*]] = or i1 [[TMP27]], [[TMP26]]
+; CHECK-VF1-IC2-NEXT:    [[TMP25:%.*]] = freeze i1 [[BIN_RDX7]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select i1 [[TMP25]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
@@ -901,26 +916,27 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[TMP8:%.*]] = freeze i1 [[TMP13]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP9]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP14:%.*]] = freeze i1 [[TMP9]]
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP14]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -960,10 +976,10 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -974,25 +990,25 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP20]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP21]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP14]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = freeze i1 [[TMP14]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP22]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP21]], [[TMP20]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP15]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = freeze i1 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP19]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -1032,10 +1048,10 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -1044,18 +1060,22 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 993b56a05207b..da0f7283d80d5 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -272,33 +272,6 @@ exit:                                     ; preds = %for.body
 }
 
 
-; We don't support select/cmp reduction patterns where there is more than one
-; use of the icmp/fcmp.
-define i32 @select_const_i32_from_icmp_mul_use(ptr nocapture readonly %v1, ptr %v2, i64 %n) {
-; CHECK-LABEL: @select_const_i32_from_icmp_mul_use
-; CHECK-NOT: vector.body
-entry:
-  br label %for.body
-
-for.body:                                      ; preds = %entry, %for.body
-  %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
-  %1 = phi i32 [ 3, %entry ], [ %6, %for.body ]
-  %2 = phi i32 [ 0, %entry ], [ %7, %for.body ]
-  %3 = getelementptr inbounds i32, ptr %v1, i64 %0
-  %4 = load i32, ptr %3, align 4
-  %5 = icmp eq i32 %4, 3
-  %6 = select i1 %5, i32 %1, i32 7
-  %7 = zext i1 %5 to i32
-  %8 = add nuw nsw i64 %0, 1
-  %9 = icmp eq i64 %8, %n
-  br i1 %9, label %exit, label %for.body
-
-exit:                                     ; preds = %for.body
-  store i32 %7, ptr %v2, align 4
-  ret i32 %6
-}
-
-
 ; We don't support selecting loop-variant values.
 define i32 @select_variant_i32_from_icmp(ptr nocapture readonly %v1, ptr nocapture readonly %v2, i64 %n) {
 ; CHECK-LABEL: @select_variant_i32_from_icmp

>From 327f94dfeb948fc1bfae3acd537ad1babee17da5 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 26 Jun 2024 11:45:31 +0000
Subject: [PATCH 8/8] Further test updates.

---
 .../LoopVectorize/AArch64/select-costs.ll     |   31 +-
 .../LoopVectorize/select-cmp-multiuse.ll      | 1343 +++++++++--------
 2 files changed, 710 insertions(+), 664 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
index 25342040aad36..2bcc93127da1e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
@@ -1,10 +1,11 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S 2>&1 | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
 define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
+; CHECK: LV: Checking a loop in 'selects_1'
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond = select i1 %cmp1, i32 10, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
@@ -12,13 +13,14 @@ define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond = select i1 %cmp1, i32 10, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
+; CHECK: LV: Selecting VF: 4
 
 entry:
   %cmp26 = icmp sgt i32 %N, 0
   br i1 %cmp26, label %for.body.preheader, label %for.cond.cleanup
 
 for.body.preheader:                               ; preds = %entry
-  %wide.trip.count = zext i32 %N to i64
+  %n = zext i32 %N to i64
   br label %for.body
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
@@ -34,7 +36,7 @@ for.body:                                         ; preds = %for.body.preheader,
   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
   store i32 %cond11, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 
 for.cond.cleanup.loopexit:                        ; preds = %for.body
@@ -44,37 +46,30 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
   ret void
 }
 
-define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
+; CHECK: LV: Checking a loop in 'multi_user_cmp'
 ; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
 ; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
-; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %all.off = select i1 %cmp1, i1 %all.off.next, i1 false
+; CHECK: LV: Selecting VF: 16.
 entry:
-  %wide.trip.count = zext nneg i32 %n to i64
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %all.off.next = phi i1 [ true, %entry ], [ %all.off, %for.body ]
   %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
   %load1 = load float, ptr %arrayidx, align 4
   %cmp1 = fcmp olt float %load1, 0.000000e+00
   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
-  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %all.off = select i1 %cmp1, i1 %all.off.next, i1 false
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %exit, label %for.body
 
 exit:
   %0 = select i1 %.any.0.off0, i32 2, i32 3
-  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  %1 = select i1 %all.off, i32 1, i32 %0
   ret i32 %1
 }
-
-; CHECK-LABEL: define void @selects_1(
-; CHECK:       vector.body:
-; CHECK:         select <4 x i1>
-
-; CHECK-LABEL: define i32 @multi_user_cmp(
-; CHECK:       vector.body:
-; CHECK:         %index = phi i64
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index e826d62dcb29d..8983c80bf3ef4 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC2
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1-IC2
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC1 --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC2 --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1-IC2 --check-prefix=CHECK
 
 
 ; int multi_user_cmp(float* a, long long n) {
@@ -17,62 +17,62 @@
 ;   return all ? 1 : any ? 2 : 3;
 ; }
 define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp(
-; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
-; CHECK-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP12]]
-; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
-; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP8]]
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP11]], i1 true, i1 false
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
-; CHECK-NEXT:    ret i32 [[TMP10]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC1:       vector.ph:
+; CHECK-VF4-IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4-IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC1:       vector.body:
+; CHECK-VF4-IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-VF4-IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-VF4-IC1-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC1-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]]
+; CHECK-VF4-IC1-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC1-NEXT:    [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]]
+; CHECK-VF4-IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4-IC1-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4-IC1:       middle.block:
+; CHECK-VF4-IC1-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-VF4-IC1-NEXT:    [[TMP9:%.*]] = freeze i1 [[TMP8]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true
+; CHECK-VF4-IC1-NEXT:    [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-VF4-IC1-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP10]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT2:%.*]] = select i1 [[TMP11]], i1 true, i1 false
+; CHECK-VF4-IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC1:       scalar.ph:
+; CHECK-VF4-IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP12:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP13:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP12]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP13]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -85,10 +85,10 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -99,35 +99,35 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP17]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP19]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
-; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP13]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP20]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP19]], [[TMP17]]
-; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP14]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP18]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX:%.*]] = or <4 x i1> [[TMP13]], [[TMP12]]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX]])
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP16]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX5]])
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP17]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP18]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
 ; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX7:%.*]] = phi i1 [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX7]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
@@ -137,11 +137,11 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
-; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP19]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP20]]
 ;
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -154,10 +154,10 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -166,28 +166,28 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
-; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP7]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
 ; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[TMP12]] = or i1 [[VEC_PHI]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP13]] = or i1 [[VEC_PHI1]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP12]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = freeze i1 [[BIN_RDX]]
 ; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i1 false, i1 true
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
 ; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
 ; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -204,9 +204,9 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2:       exit:
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
-; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
+; CHECK-VF1-IC2-NEXT:    [[TMP17:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP18]]
 ;
 entry:
   br label %for.body
@@ -243,62 +243,62 @@ exit:
 ;  return all ? 1 : any ? 2 : 3;
 ;}
 define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_int(
-; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
-; CHECK-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP12]]
-; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
-; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP8]]
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP11]], i1 true, i1 false
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
-; CHECK-NEXT:    ret i32 [[TMP10]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC1:       vector.ph:
+; CHECK-VF4-IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4-IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC1:       vector.body:
+; CHECK-VF4-IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-VF4-IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-VF4-IC1-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC1-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]]
+; CHECK-VF4-IC1-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC1-NEXT:    [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]]
+; CHECK-VF4-IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4-IC1-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4-IC1:       middle.block:
+; CHECK-VF4-IC1-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-VF4-IC1-NEXT:    [[TMP9:%.*]] = freeze i1 [[TMP8]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true
+; CHECK-VF4-IC1-NEXT:    [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-VF4-IC1-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP10]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT2:%.*]] = select i1 [[TMP11]], i1 true, i1 false
+; CHECK-VF4-IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC1:       scalar.ph:
+; CHECK-VF4-IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP12:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP13:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP12]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP13]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -311,10 +311,10 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -325,35 +325,35 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP17]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP19]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
-; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP13]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP20]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP19]], [[TMP17]]
-; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP14]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP18]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX:%.*]] = or <4 x i1> [[TMP13]], [[TMP12]]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX]])
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP16]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX5]])
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP17]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP18]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
 ; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX7:%.*]] = phi i1 [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX7]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
@@ -363,11 +363,11 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
-; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP19]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP20]]
 ;
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -380,10 +380,10 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -392,28 +392,28 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP4]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[TMP5]], 0
-; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
-; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP7]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
 ; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[TMP12]] = or i1 [[VEC_PHI]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP13]] = or i1 [[VEC_PHI1]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP12]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = freeze i1 [[BIN_RDX]]
 ; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i1 false, i1 true
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
 ; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
 ; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -430,9 +430,9 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2:       exit:
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
-; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
+; CHECK-VF1-IC2-NEXT:    [[TMP17:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP18]]
 ;
 entry:
   br label %for.body
@@ -472,121 +472,121 @@ exit:
 ;  return all ? 1 : any ? 2 : 3;
 ; }
 define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
-; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
-; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
-; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
-; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP9:%.*]], [[PRED_STORE_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[RDX_SELECT_CMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP5]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
-; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
-; CHECK:       pred.store.if:
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 1
-; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
-; CHECK:       pred.store.continue:
-; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
-; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; CHECK:       pred.store.if3:
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
-; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
-; CHECK:       pred.store.continue4:
-; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
-; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
-; CHECK:       pred.store.if5:
-; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
-; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
-; CHECK:       pred.store.continue6:
-; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP21]], [[PRED_STORE_IF5]] ]
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
-; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
-; CHECK:       pred.store.if7:
-; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
-; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
-; CHECK-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
-; CHECK:       pred.store.continue8:
-; CHECK-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP27]], [[PRED_STORE_IF7]] ]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP36:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
-; CHECK-NEXT:    [[TMP31:%.*]] = freeze i1 [[TMP36]]
-; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP31]], i1 false, i1 true
-; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-NEXT:    [[TMP35:%.*]] = freeze i1 [[TMP32]]
-; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP35]], i1 true, i1 false
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
-; CHECK:       if.then3:
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
-; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
-; CHECK-NEXT:    br label [[IF_END6]]
-; CHECK:       if.end6:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP33]]
-; CHECK-NEXT:    ret i32 [[TMP34]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-VF4-IC1:       vector.memcheck:
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-VF4-IC1-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-VF4-IC1-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-VF4-IC1-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC1:       vector.ph:
+; CHECK-VF4-IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4-IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC1:       vector.body:
+; CHECK-VF4-IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-VF4-IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-VF4-IC1-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC1-NEXT:    [[TMP5]] = or <4 x i1> [[VEC_PHI2]], [[TMP4]]
+; CHECK-VF4-IC1-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC1-NEXT:    [[TMP7]] = or <4 x i1> [[VEC_PHI]], [[TMP6]]
+; CHECK-VF4-IC1-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-VF4-IC1:       pred.store.if:
+; CHECK-VF4-IC1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP10]], 1
+; CHECK-VF4-IC1-NEXT:    store i32 [[TMP11]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK-VF4-IC1:       pred.store.continue:
+; CHECK-VF4-IC1-NEXT:    [[TMP12:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP10]], [[PRED_STORE_IF]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK-VF4-IC1:       pred.store.if3:
+; CHECK-VF4-IC1-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF4-IC1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]]
+; CHECK-VF4-IC1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 1
+; CHECK-VF4-IC1-NEXT:    store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK-VF4-IC1:       pred.store.continue4:
+; CHECK-VF4-IC1-NEXT:    [[TMP18:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP16]], [[PRED_STORE_IF3]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP19]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK-VF4-IC1:       pred.store.if5:
+; CHECK-VF4-IC1-NEXT:    [[TMP20:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF4-IC1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP20]]
+; CHECK-VF4-IC1-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    [[TMP23:%.*]] = add nsw i32 [[TMP22]], 1
+; CHECK-VF4-IC1-NEXT:    store i32 [[TMP23]], ptr [[TMP21]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF4-IC1:       pred.store.continue6:
+; CHECK-VF4-IC1-NEXT:    [[TMP24:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP22]], [[PRED_STORE_IF5]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK-VF4-IC1:       pred.store.if7:
+; CHECK-VF4-IC1-NEXT:    [[TMP26:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF4-IC1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP26]]
+; CHECK-VF4-IC1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    [[TMP29:%.*]] = add nsw i32 [[TMP28]], 1
+; CHECK-VF4-IC1-NEXT:    store i32 [[TMP29]], ptr [[TMP27]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC1-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK-VF4-IC1:       pred.store.continue8:
+; CHECK-VF4-IC1-NEXT:    [[TMP30:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP28]], [[PRED_STORE_IF7]] ]
+; CHECK-VF4-IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4-IC1-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF4-IC1:       middle.block:
+; CHECK-VF4-IC1-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; CHECK-VF4-IC1-NEXT:    [[TMP33:%.*]] = freeze i1 [[TMP32]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP33]], i1 false, i1 true
+; CHECK-VF4-IC1-NEXT:    [[TMP34:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-VF4-IC1-NEXT:    [[TMP35:%.*]] = freeze i1 [[TMP34]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT9:%.*]] = select i1 [[TMP35]], i1 true, i1 false
+; CHECK-VF4-IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC1:       scalar.ph:
+; CHECK-VF4-IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX10:%.*]] = phi i1 [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ], [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX10]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK-VF4-IC1:       if.then3:
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-VF4-IC1-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-VF4-IC1-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-VF4-IC1-NEXT:    br label [[IF_END6]]
+; CHECK-VF4-IC1:       if.end6:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP36:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP37:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP36]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP37]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
@@ -607,10 +607,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP64:%.*]], [[PRED_STORE_CONTINUE19]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
@@ -621,121 +621,121 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP64]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
-; CHECK-VF4-IC2-NEXT:    [[TMP66]] = or <4 x i1> [[VEC_PHI6]], [[TMP8]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = or <4 x i1> [[VEC_PHI4]], [[TMP8]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = or <4 x i1> [[VEC_PHI4]], [[TMP12]]
-; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-VF4-IC2-NEXT:    [[TMP13]] = or <4 x i1> [[VEC_PHI]], [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[TMP14]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP15]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if:
-; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
-; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = add nsw i32 [[TMP17]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP18]], ptr [[TMP16]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK-VF4-IC2:       pred.store.continue:
-; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_STORE_IF]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP17]], [[PRED_STORE_IF]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if6:
-; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 1
-; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
-; CHECK-VF4-IC2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
+; CHECK-VF4-IC2-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP24:%.*]] = add nsw i32 [[TMP23]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP24]], ptr [[TMP22]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; CHECK-VF4-IC2:       pred.store.continue7:
-; CHECK-VF4-IC2-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP21]], [[PRED_STORE_IF6]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
+; CHECK-VF4-IC2-NEXT:    [[TMP25:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP23]], [[PRED_STORE_IF6]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP26:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if8:
-; CHECK-VF4-IC2-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 2
-; CHECK-VF4-IC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
-; CHECK-VF4-IC2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF4-IC2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP27]]
+; CHECK-VF4-IC2-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP30:%.*]] = add nsw i32 [[TMP29]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP30]], ptr [[TMP28]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE9]]
 ; CHECK-VF4-IC2:       pred.store.continue9:
-; CHECK-VF4-IC2-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE7]] ], [ [[TMP27]], [[PRED_STORE_IF8]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP30]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
+; CHECK-VF4-IC2-NEXT:    [[TMP31:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE7]] ], [ [[TMP29]], [[PRED_STORE_IF8]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP32:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP32]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if10:
-; CHECK-VF4-IC2-NEXT:    [[TMP31:%.*]] = add i64 [[INDEX]], 3
-; CHECK-VF4-IC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
-; CHECK-VF4-IC2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP34:%.*]] = add nsw i32 [[TMP33]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP34]], ptr [[TMP32]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP33:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF4-IC2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP33]]
+; CHECK-VF4-IC2-NEXT:    [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP36:%.*]] = add nsw i32 [[TMP35]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP36]], ptr [[TMP34]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE11]]
 ; CHECK-VF4-IC2:       pred.store.continue11:
-; CHECK-VF4-IC2-NEXT:    [[TMP35:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE9]] ], [ [[TMP33]], [[PRED_STORE_IF10]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP36]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
+; CHECK-VF4-IC2-NEXT:    [[TMP37:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE9]] ], [ [[TMP35]], [[PRED_STORE_IF10]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP38:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP38]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if12:
-; CHECK-VF4-IC2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
-; CHECK-VF4-IC2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP39:%.*]] = add nsw i32 [[TMP38]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP39]], ptr [[TMP37]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP41:%.*]] = add nsw i32 [[TMP40]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP41]], ptr [[TMP39]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE13]]
 ; CHECK-VF4-IC2:       pred.store.continue13:
-; CHECK-VF4-IC2-NEXT:    [[TMP40:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE11]] ], [ [[TMP38]], [[PRED_STORE_IF12]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP41:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP41]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
+; CHECK-VF4-IC2-NEXT:    [[TMP42:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE11]] ], [ [[TMP40]], [[PRED_STORE_IF12]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP43:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP43]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if14:
-; CHECK-VF4-IC2-NEXT:    [[TMP42:%.*]] = add i64 [[INDEX]], 5
-; CHECK-VF4-IC2-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP42]]
-; CHECK-VF4-IC2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP45:%.*]] = add nsw i32 [[TMP44]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP45]], ptr [[TMP43]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP44:%.*]] = add i64 [[INDEX]], 5
+; CHECK-VF4-IC2-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP44]]
+; CHECK-VF4-IC2-NEXT:    [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP47:%.*]] = add nsw i32 [[TMP46]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP47]], ptr [[TMP45]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE15]]
 ; CHECK-VF4-IC2:       pred.store.continue15:
-; CHECK-VF4-IC2-NEXT:    [[TMP46:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE13]] ], [ [[TMP44]], [[PRED_STORE_IF14]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP47:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP47]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
+; CHECK-VF4-IC2-NEXT:    [[TMP48:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE13]] ], [ [[TMP46]], [[PRED_STORE_IF14]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP49:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP49]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if16:
-; CHECK-VF4-IC2-NEXT:    [[TMP48:%.*]] = add i64 [[INDEX]], 6
-; CHECK-VF4-IC2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP48]]
-; CHECK-VF4-IC2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP51:%.*]] = add nsw i32 [[TMP50]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP51]], ptr [[TMP49]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP50:%.*]] = add i64 [[INDEX]], 6
+; CHECK-VF4-IC2-NEXT:    [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP50]]
+; CHECK-VF4-IC2-NEXT:    [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP53:%.*]] = add nsw i32 [[TMP52]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP53]], ptr [[TMP51]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE17]]
 ; CHECK-VF4-IC2:       pred.store.continue17:
-; CHECK-VF4-IC2-NEXT:    [[TMP52:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE15]] ], [ [[TMP50]], [[PRED_STORE_IF16]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP53:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP53]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]]
+; CHECK-VF4-IC2-NEXT:    [[TMP54:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE15]] ], [ [[TMP52]], [[PRED_STORE_IF16]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP55:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP55]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]]
 ; CHECK-VF4-IC2:       pred.store.if18:
-; CHECK-VF4-IC2-NEXT:    [[TMP54:%.*]] = add i64 [[INDEX]], 7
-; CHECK-VF4-IC2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP54]]
-; CHECK-VF4-IC2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP57:%.*]] = add nsw i32 [[TMP56]], 1
-; CHECK-VF4-IC2-NEXT:    store i32 [[TMP57]], ptr [[TMP55]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP56:%.*]] = add i64 [[INDEX]], 7
+; CHECK-VF4-IC2-NEXT:    [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP56]]
+; CHECK-VF4-IC2-NEXT:    [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP59:%.*]] = add nsw i32 [[TMP58]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP59]], ptr [[TMP57]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE19]]
 ; CHECK-VF4-IC2:       pred.store.continue19:
-; CHECK-VF4-IC2-NEXT:    [[TMP58:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE17]] ], [ [[TMP56]], [[PRED_STORE_IF18]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP60:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE17]] ], [ [[TMP58]], [[PRED_STORE_IF18]] ]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-VF4-IC2-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP59]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF4-IC2-NEXT:    [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP61]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP20:%.*]] = or <4 x i1> [[TMP10]], [[TMP9]]
-; CHECK-VF4-IC2-NEXT:    [[TMP60:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP20]])
-; CHECK-VF4-IC2-NEXT:    [[TMP67:%.*]] = freeze i1 [[TMP60]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP67]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP24:%.*]] = or <4 x i1> [[TMP66]], [[TMP64]]
-; CHECK-VF4-IC2-NEXT:    [[TMP61:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP24]])
-; CHECK-VF4-IC2-NEXT:    [[TMP65:%.*]] = freeze i1 [[TMP61]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT25:%.*]] = select i1 [[TMP65]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX:%.*]] = or <4 x i1> [[TMP14]], [[TMP13]]
+; CHECK-VF4-IC2-NEXT:    [[TMP62:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX]])
+; CHECK-VF4-IC2-NEXT:    [[TMP63:%.*]] = freeze i1 [[TMP62]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP63]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX20:%.*]] = or <4 x i1> [[TMP10]], [[TMP9]]
+; CHECK-VF4-IC2-NEXT:    [[TMP64:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX20]])
+; CHECK-VF4-IC2-NEXT:    [[TMP65:%.*]] = freeze i1 [[TMP64]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP65]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
 ; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX26:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX22:%.*]] = phi i1 [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ], [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX26]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX22]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
@@ -753,11 +753,11 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT25]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP62:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF4-IC2-NEXT:    [[TMP63:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP62]]
-; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP63]]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP66:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP67:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP66]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP67]]
 ;
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
@@ -778,10 +778,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI6:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
@@ -790,46 +790,46 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP4]], align 4, !alias.scope [[META6]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt float [[TMP6]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP26]] = or i1 [[VEC_PHI5]], [[TMP7]]
-; CHECK-VF1-IC2-NEXT:    [[TMP27]] = or i1 [[VEC_PHI6]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = or i1 [[VEC_PHI4]], [[TMP8]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
 ; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = xor i1 [[TMP8]], true
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = or i1 [[VEC_PHI4]], [[TMP12]]
+; CHECK-VF1-IC2-NEXT:    [[TMP13]] = or i1 [[VEC_PHI]], [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[TMP14]] = or i1 [[VEC_PHI2]], [[TMP12]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF1-IC2:       pred.store.if:
-; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
-; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
-; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = add nsw i32 [[TMP14]], 1
-; CHECK-VF1-IC2-NEXT:    store i32 [[TMP15]], ptr [[TMP13]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP17:%.*]] = add nsw i32 [[TMP16]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
 ; CHECK-VF1-IC2:       pred.store.continue:
-; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_STORE_IF]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP16]], [[PRED_STORE_IF]] ]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
 ; CHECK-VF1-IC2:       pred.store.if5:
-; CHECK-VF1-IC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
-; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META9]], !noalias [[META6]]
-; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = add nsw i32 [[TMP18]], 1
-; CHECK-VF1-IC2-NEXT:    store i32 [[TMP19]], ptr [[TMP17]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP21:%.*]] = add nsw i32 [[TMP20]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[TMP21]], ptr [[TMP19]], align 4, !alias.scope [[META9]], !noalias [[META6]]
 ; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK-VF1-IC2:       pred.store.continue6:
-; CHECK-VF1-IC2-NEXT:    [[TMP20:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP18]], [[PRED_STORE_IF5]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP22:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP20]], [[PRED_STORE_IF5]] ]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-VF1-IC2-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF1-IC2-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP10]], [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP14]], [[TMP13]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP24:%.*]] = freeze i1 [[BIN_RDX]]
 ; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP24]], i1 false, i1 true
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX7:%.*]] = or i1 [[TMP27]], [[TMP26]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX7:%.*]] = or i1 [[TMP10]], [[TMP9]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP25:%.*]] = freeze i1 [[BIN_RDX7]]
 ; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select i1 [[TMP25]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
 ; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX9:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX9:%.*]] = phi i1 [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ], [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
@@ -854,9 +854,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2:       exit:
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[TMP22:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF1-IC2-NEXT:    [[TMP23:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP22]]
-; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP23]]
+; CHECK-VF1-IC2-NEXT:    [[TMP26:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP27:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP26]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP27]]
 ;
 entry:
   br label %for.body
@@ -905,65 +905,65 @@ exit:
 ;   return all ? c : any ? 2 : 3;
 ; }
 define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
-; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
-; CHECK-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
-; CHECK-NEXT:    [[TMP8:%.*]] = freeze i1 [[TMP13]]
-; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i1 false, i1 true
-; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[TMP14:%.*]] = freeze i1 [[TMP9]]
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP14]], i1 true, i1 false
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[TMP10:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
-; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP10]], i32 [[TMP11]]
-; CHECK-NEXT:    ret i32 [[TMP12]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC1:       vector.ph:
+; CHECK-VF4-IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4-IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4-IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC1:       vector.body:
+; CHECK-VF4-IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-VF4-IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-VF4-IC1-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC1-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]]
+; CHECK-VF4-IC1-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC1-NEXT:    [[TMP6]] = or <4 x i1> [[VEC_PHI]], [[TMP5]]
+; CHECK-VF4-IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4-IC1-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-VF4-IC1:       middle.block:
+; CHECK-VF4-IC1-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-VF4-IC1-NEXT:    [[TMP10:%.*]] = freeze i1 [[TMP9]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP10]], i1 false, i1 true
+; CHECK-VF4-IC1-NEXT:    [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-VF4-IC1-NEXT:    [[TMP12:%.*]] = freeze i1 [[TMP11]]
+; CHECK-VF4-IC1-NEXT:    [[RDX_SELECT2:%.*]] = select i1 [[TMP12]], i1 true, i1 false
+; CHECK-VF4-IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC1:       scalar.ph:
+; CHECK-VF4-IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP13:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-VF4-IC1-NEXT:    [[TMP14:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP15:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP13]], i32 [[TMP14]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP15]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -976,10 +976,10 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -990,36 +990,36 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP20]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
-; CHECK-VF4-IC2-NEXT:    [[TMP21]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[TMP12]] = or <4 x i1> [[VEC_PHI]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP13]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
-; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = freeze i1 [[TMP14]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP22]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP21]], [[TMP20]]
-; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = freeze i1 [[TMP15]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP19]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX:%.*]] = or <4 x i1> [[TMP13]], [[TMP12]]
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX]])
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = freeze i1 [[TMP16]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP17]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[BIN_RDX5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX5]])
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = freeze i1 [[TMP18]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP19]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
 ; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX7:%.*]] = phi i1 [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX7]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
@@ -1029,13 +1029,13 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
-; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP16]], i32 [[TMP17]]
-; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP18]]
+; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-VF4-IC2-NEXT:    [[TMP21:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP20]], i32 [[TMP21]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP22]]
 ;
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -1048,10 +1048,10 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -1060,28 +1060,28 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
-; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP7]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
 ; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[TMP12]] = or i1 [[VEC_PHI]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP13]] = or i1 [[VEC_PHI1]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
-; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = freeze i1 [[BIN_RDX]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i1 false, i1 true
-; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP12]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP9]], [[TMP8]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
 ; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
 ; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
 ; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -1099,10 +1099,10 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
-; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
-; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP13]], i32 [[TMP14]]
-; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP15]]
+; CHECK-VF1-IC2-NEXT:    [[TMP17:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP17]], i32 [[TMP18]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP19]]
 ;
 entry:
   br label %for.body
@@ -1144,30 +1144,30 @@ exit:
 ;  return all ? 1 : any ? 2 : 3;
 ; }
 define i32 @multi_user_cmp_fmax(ptr readonly %a, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_fmax(
-; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP1]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_fmax(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -1260,30 +1260,30 @@ exit:
 ;  return all ? 1 : any ? 2 : 3;
 ; }
 define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_max(
-; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP1]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_max(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -1379,34 +1379,34 @@ declare i32 @llvm.smax.i32(i32, i32)
 ;   return all ? 1 : any ? 2 : 3;
 ; }
 define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_use_store_offset(
-; CHECK-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
-; CHECK-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
-; CHECK-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
-; CHECK-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-VF4-IC1-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-VF4-IC1-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-VF4-IC1-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-VF4-IC1-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP1]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_use_store_offset(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
@@ -1496,30 +1496,30 @@ exit:
 
 ; Not vectorising, compare instruction user %0 inside the loop
 define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_no_vectorise(
-; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP3]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_no_vectorise(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -1597,29 +1597,29 @@ exit:
 
 ; Not vectorising, non recurrent select instrction %0 inside the loop
 define i32 @multi_user_cmp_extra_select(ptr readonly %a, i64 noundef %n) {
-; CHECK-LABEL: define i32 @multi_user_cmp_extra_select(
-; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
-; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
-; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-VF4-IC1-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC1-NEXT:  entry:
+; CHECK-VF4-IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC1:       for.body:
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC1-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC1-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC1:       exit:
+; CHECK-VF4-IC1-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC1-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC1-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-VF4-IC1-NEXT:    ret i32 [[TMP2]]
 ;
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_extra_select(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
@@ -1691,3 +1691,54 @@ exit:
   %2 = select i1 %all.0.off0., i32 1, i32 %1
   ret i32 %2
 }
+;.
+; CHECK-VF4-IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-VF4-IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-VF4-IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-VF4-IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK-VF4-IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-VF4-IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK-VF4-IC1: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK-VF4-IC1: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK-VF4-IC1: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK-VF4-IC1: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK-VF4-IC1: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK-VF4-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
+; CHECK-VF4-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]}
+; CHECK-VF4-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
+; CHECK-VF4-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
+;.
+; CHECK-VF4-IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-VF4-IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-VF4-IC2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-VF4-IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK-VF4-IC2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-VF4-IC2: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK-VF4-IC2: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK-VF4-IC2: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK-VF4-IC2: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK-VF4-IC2: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK-VF4-IC2: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK-VF4-IC2: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
+; CHECK-VF4-IC2: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]}
+; CHECK-VF4-IC2: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
+; CHECK-VF4-IC2: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
+;.
+; CHECK-VF1-IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-VF1-IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-VF1-IC2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-VF1-IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK-VF1-IC2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-VF1-IC2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK-VF1-IC2: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK-VF1-IC2: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK-VF1-IC2: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK-VF1-IC2: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK-VF1-IC2: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK-VF1-IC2: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
+; CHECK-VF1-IC2: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]}
+; CHECK-VF1-IC2: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
+; CHECK-VF1-IC2: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]]}
+;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}



More information about the llvm-commits mailing list