[llvm] [LoopVectorize] LLVM fails to vectorise loops with multi-bool varables (PR #89226)

Dinar Temirbulatov via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 26 04:46:15 PDT 2024


https://github.com/dtemirbulatov updated https://github.com/llvm/llvm-project/pull/89226

>From 3f2f68c21aa629bea70b8f3e2e5073658dc87ea3 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Fri, 3 May 2024 13:29:13 +0000
Subject: [PATCH 1/8] Original verison of
 llvm/test/Transforms/LoopVectorize/multicmp.ll

---
 .../test/Transforms/LoopVectorize/multicmp.ll | 1028 +++++++++++++++++
 1 file changed, 1028 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/multicmp.ll

diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/multicmp.ll
new file mode 100644
index 0000000000000..538b62ec06a33
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/multicmp.ll
@@ -0,0 +1,1028 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC2
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1-IC2
+
+
+; int multi_user_cmp(float* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   for (long long i = 0; i < n; i++) {
+;     if (a[i] < 0.0f) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;   return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    ret i32 [[TMP10]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+;int multi_user_cmp_int(int* a, long long n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  for (long long i = 0; i < n; i++) {
+;    if (a[i] < 0) {
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;  }
+;  return all ? 1 : any ? 2 : 3;
+;}
+define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    ret i32 [[TMP10]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp slt i32 %load1, 0
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; int multi_user_cmp_branch_use(float* a, int *b, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] < 0.0f;
+;     if (c) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;     if (c)
+;       b[i]++;
+;   }
+;  return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    br label [[IF_END6]]
+; CHECK:       if.end6:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP33]]
+; CHECK-NEXT:    ret i32 [[TMP34]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK-VF4-IC2:       if.then3:
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-VF4-IC2-NEXT:    br label [[IF_END6]]
+; CHECK-VF4-IC2:       if.end6:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP62:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP63:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP62]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP63]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[CMP1]], i1 true, i1 [[VEC_PHI4]]
+; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[CMP1]], i1 [[VEC_PHI2]], i1 false
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       if.then3:
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-VF1-IC2-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       if.end6:
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[TMP10]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[TMP12]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP22:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP23:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP22]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP23]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end6 ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %if.end6 ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %if.end6 ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  br i1 %cmp1, label %if.then3, label %if.end6
+
+if.then3:
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %load2 = load i32, ptr %arrayidx5, align 4
+  %inc = add nsw i32 %load2, 1
+  store i32 %inc, ptr %arrayidx5, align 4
+  br label %if.end6
+
+if.end6:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; int multi_user_cmp_branch_use_and_outside_bb_use(float* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   _Bool c;
+;   for (long long i = 0; i < n; i++) {
+;     c = a[i] < 0.0f;
+;     if (c) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;   return all ? c : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP10]], i32 [[TMP11]]
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP16]], i32 [[TMP17]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP18]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP13]], i32 [[TMP14]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP15]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = zext i1 %cmp1 to i32
+  %1 = select i1 %.any.0.off0, i32 2, i32 3
+  %2 = select i1 %all.0.off0., i32 %0, i32 %1
+  ret i32 %2
+}
+
+; Currently, this test-case is not supported.
+; int multi_user_cmp_fmax(float* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   float max = -INFINITY;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] > max;
+;     if (c) {
+;       max = a[i];
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;  return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_fmax(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %max.015 = phi float [ 0xFFF0000000000000, %entry ], [ %.max.0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp ogt float %load1, %max.015
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %.max.0 = select i1 %cmp1, float %load1, float %max.015
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; Currently, this test-case is not supported.
+; int multi_user_cmp_fmax(int* a, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   int max = 0;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] > max;
+;     if (c) {
+;       max = a[i];
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;   }
+;  return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %max.015 = phi i32 [ 0, %entry ], [ %.max.0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %load1, %max.015
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %.max.0 = select i1 %cmp1, i32 %load1, i32 %max.015
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; Currently, this test-case is not supported.
+; int multi_user_cmp_use_store_offset(float* a, int *b, long long n) {
+;   _Bool any = 0;
+;   _Bool all = 1;
+;   for (long long i = 0; i < n; i++) {
+;     _Bool c = a[i] < 0.0f;
+;     if (c) {
+;       any = 1;
+;     } else {
+;       all = 0;
+;     }
+;    b[i+c] = any;
+;   }
+;   return all ? 1 : any ? 2 : 3;
+; }
+define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-VF4-IC2-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-VF4-IC2-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-VF4-IC2-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-VF4-IC2-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-VF1-IC2-NEXT:    [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-VF1-IC2-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-VF1-IC2-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-VF1-IC2-NEXT:    store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP1]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %conv4 = zext i1 %cmp1 to i32
+  %n32 = trunc i64 %n to i32
+  %add = add nuw nsw i32 %conv4, %n32
+  %idxprom5 = zext nneg i32 %add to i64
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %idxprom5
+  store i32 %conv4, ptr %arrayidx6, align 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; Not vectorising, compare instruction user %0 inside the loop
+define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP3]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP3]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %0 = sext i1 %cmp1 to i64
+  %1 = add i64 %0, %indvars.iv
+  %indvars.iv.next = add nuw nsw i64 %1, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %2 = select i1 %.any.0.off0, i32 2, i32 3
+  %3 = select i1 %all.0.off0., i32 1, i32 %2
+  ret i32 %3
+}
+
+; Not vectorising, non recurrent select instrction %0 inside the loop
+define i32 @multi_user_cmp_extra_select(ptr readonly %a, i64 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4-IC2:       for.body:
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2:       exit:
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP2]]
+;
+; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF1-IC2:       exit:
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %0 = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %1 = select i1 %.any.0.off0, i32 2, i32 3
+  %2 = select i1 %all.0.off0., i32 1, i32 %1
+  ret i32 %2
+}

>From d067e52da869740a914e8d6a6cde2cf81383ee5d Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 18 Apr 2024 11:03:44 +0000
Subject: [PATCH 2/8] [LoopVectorize] LLVM fails to vectorise loops with
 multiple bool variables

This patch allows to consider compare instructions in the loop with multiple
use inside the loop and outside, if we can prove that compare instruction user
is a recurrent reduction or used in branching or outside the loop then it is
safe to consider to vectorise.

This change allows to vectorise this loop:
int foo(float* a, int n) {
  _Bool any = 0;
  _Bool all = 1;
  for (int i = 0; i < n; i++) {
    if (a[i] < 0.0f) {
      any = 1;
    } else {
      all = 0;
    }
  }
  return all ? 1 : any ? 2 : 3;
}
---
 llvm/include/llvm/Analysis/IVDescriptors.h    |  20 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |  65 +-
 .../Vectorize/LoopVectorizationLegality.cpp   |  25 +
 .../test/Transforms/LoopVectorize/multicmp.ll | 881 +++++++++++++++---
 4 files changed, 860 insertions(+), 131 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 5c7b613ac48c4..f18ab500c4d9f 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -76,11 +76,11 @@ class RecurrenceDescriptor {
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
                        Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
-                       unsigned MinWidthCastToRecurTy)
+                       unsigned MinWidthCastToRecurTy, Instruction *Cmp)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
         IsSigned(Signed), IsOrdered(Ordered),
-        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
+        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy), MultiCmp(Cmp) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -88,12 +88,13 @@ class RecurrenceDescriptor {
   class InstDesc {
   public:
     InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
-        : IsRecurrence(IsRecur), PatternLastInst(I),
-          RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
+        : IsRecurrence(IsRecur), PatternLastInst(I), RecKind(RecurKind::None),
+          ExactFPMathInst(ExactFP), Cmp(nullptr) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
+    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr,
+             Instruction *MultiCmp = nullptr)
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP) {}
+          ExactFPMathInst(ExactFP), Cmp(MultiCmp) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
@@ -105,6 +106,8 @@ class RecurrenceDescriptor {
 
     Instruction *getPatternInst() const { return PatternLastInst; }
 
+    Instruction *getMultiCmp() const { return Cmp; }
+
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
@@ -115,6 +118,8 @@ class RecurrenceDescriptor {
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
     Instruction *ExactFPMathInst;
+    // Mult-user compare instruction.
+    Instruction *Cmp;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -270,6 +275,8 @@ class RecurrenceDescriptor {
            cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
   }
 
+  Instruction *getMultiCmp() const { return MultiCmp; }
+
   /// Reductions may store temporary or final result to an invariant address.
   /// If there is such a store in the loop then, after successfull run of
   /// AddReductionVar method, this field will be assigned the last met store.
@@ -300,6 +307,7 @@ class RecurrenceDescriptor {
   SmallPtrSet<Instruction *, 8> CastInsts;
   // The minimum width used by the recurrence.
   unsigned MinWidthCastToRecurrenceType;
+  Instruction *MultiCmp = nullptr;
 };
 
 /// A struct for saving information about induction variables.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 055f121e74341..811c4b75e0705 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -256,6 +256,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
+  Instruction *MultiCMP = nullptr;
   bool IsSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
@@ -400,6 +401,8 @@ bool RecurrenceDescriptor::AddReductionVar(
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
+    if (IsASelect)
+      MultiCMP = ReduxDesc.getMultiCmp();
 
     // A conditional reduction operation must only have 2 or less uses in
     // VisitedInsts.
@@ -597,7 +600,8 @@ bool RecurrenceDescriptor::AddReductionVar(
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
                           FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
+                          MultiCMP);
   RedDes = RD;
 
   return true;
@@ -635,14 +639,59 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
+  SelectInst *SI = dyn_cast<SelectInst>(I);
+  Instruction *Cmp = nullptr;
+
+  if (SI) {
+    bool HasOrigPhiUser = false;
+    bool SelectNonPHIUserInLoop = false;
+    auto Blocks = Loop->getBlocksVector();
+    for (User *U : SI->users()) {
+      Instruction *Inst = dyn_cast<Instruction>(U);
+      if (!Inst)
+        continue;
+      if (Inst == OrigPhi) {
+        HasOrigPhiUser = true;
+      } else {
+        if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
+            Blocks.end())
+          SelectNonPHIUserInLoop = true;
+      }
+    }
+    Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+    if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
+      bool IsSafeCMP = true;
+      for (User *U : Cmp->users()) {
+        Instruction *UInst = dyn_cast<Instruction>(U);
+        if (!UInst)
+          continue;
+        if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
+          if (!llvm::all_of(SI1->users(), [Blocks](User *USI) {
+                Instruction *Inst1 = dyn_cast<Instruction>(USI);
+                if (!Inst1 || (std::find(Blocks.begin(), Blocks.end(),
+                                         Inst1->getParent()) == Blocks.end() ||
+                               isa<PHINode>(Inst1)))
+                  return true;
+                return false;
+              }))
+            IsSafeCMP = false;
+        }
+        if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
+            std::find(Blocks.begin(), Blocks.end(), UInst->getParent()) !=
+                Blocks.end())
+          IsSafeCMP = false;
+      }
+      if (!IsSafeCMP)
+        Cmp = nullptr;
+    }
+  }
+
   // Only match select with single use cmp condition.
-  if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
-                         m_Value())))
+  if (!Cmp && !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
+                                 m_Value(), m_Value())))
     return InstDesc(false, I);
 
-  SelectInst *SI = cast<SelectInst>(I);
   Value *NonPhi = nullptr;
-
   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
     NonPhi = SI->getFalseValue();
   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
@@ -656,8 +705,10 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   if (!Loop->isLoopInvariant(NonPhi))
     return InstDesc(false, I);
 
-  return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
-                                                     : RecurKind::FAnyOf);
+  return InstDesc(I,
+                  isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
+                                                  : RecurKind::FAnyOf,
+                  nullptr, Cmp);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 0c18c4e146de1..c8e8c011972d2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -787,6 +787,7 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
+  DenseMap<Instruction *, unsigned> MultiCmpsRed;
 
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
@@ -830,6 +831,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
+          Instruction *Cmp = RedDes.getMultiCmp();
+          if (Cmp) {
+            if (MultiCmpsRed.contains(Cmp))
+              MultiCmpsRed[Cmp]++;
+            else
+              MultiCmpsRed[Cmp] = 1;
+          }
           continue;
         }
 
@@ -1045,6 +1053,23 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
+  // Make sure that all compare instruction users are recurrent if in loop's BB.
+  if (MultiCmpsRed.size() > 0) {
+    auto Blocks = TheLoop->getBlocksVector();
+    for (auto const &C : MultiCmpsRed) {
+      Instruction *Cmp = C.first;
+      unsigned Counter = 0;
+      for (User *U : Cmp->users()) {
+        SelectInst *Inst = dyn_cast<SelectInst>(U);
+        if (Inst && std::find(Blocks.begin(), Blocks.end(),
+                              Inst->getParent()) != Blocks.end())
+          Counter++;
+      }
+      if (Counter != C.second)
+        return false;
+    }
+  }
+
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.
diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/multicmp.ll
index 538b62ec06a33..17c7383afd8b0 100644
--- a/llvm/test/Transforms/LoopVectorize/multicmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/multicmp.ll
@@ -20,22 +20,55 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
 ; CHECK-NEXT:    ret i32 [[TMP10]]
@@ -43,22 +76,68 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
 ; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
@@ -66,22 +145,60 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX6]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
 ; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
@@ -124,22 +241,55 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
 ; CHECK-NEXT:    ret i32 [[TMP10]]
@@ -147,22 +297,68 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD4]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP15]]
 ; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP16]]
@@ -170,22 +366,60 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP4]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[TMP5]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX6]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP13]]
 ; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP14]]
@@ -231,11 +465,95 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 1
+; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP21]], [[PRED_STORE_IF5]] ]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP27]], [[PRED_STORE_IF7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP31:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP31]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP32]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
@@ -251,10 +569,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK:       if.end6:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP33]]
 ; CHECK-NEXT:    ret i32 [[TMP34]]
@@ -262,11 +580,151 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-VF4-IC2:       vector.memcheck:
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-VF4-IC2-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-VF4-IC2-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-VF4-IC2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI4]]
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI2]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-VF4-IC2:       pred.store.if:
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK-VF4-IC2:       pred.store.continue:
+; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP15]], [[PRED_STORE_IF]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
+; CHECK-VF4-IC2:       pred.store.if6:
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-VF4-IC2-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
+; CHECK-VF4-IC2:       pred.store.continue7:
+; CHECK-VF4-IC2-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP21]], [[PRED_STORE_IF6]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
+; CHECK-VF4-IC2:       pred.store.if8:
+; CHECK-VF4-IC2-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF4-IC2-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-VF4-IC2-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE9]]
+; CHECK-VF4-IC2:       pred.store.continue9:
+; CHECK-VF4-IC2-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE7]] ], [ [[TMP27]], [[PRED_STORE_IF8]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP30]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
+; CHECK-VF4-IC2:       pred.store.if10:
+; CHECK-VF4-IC2-NEXT:    [[TMP31:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF4-IC2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
+; CHECK-VF4-IC2-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP34:%.*]] = add nsw i32 [[TMP33]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP34]], ptr [[TMP32]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE11]]
+; CHECK-VF4-IC2:       pred.store.continue11:
+; CHECK-VF4-IC2-NEXT:    [[TMP35:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE9]] ], [ [[TMP33]], [[PRED_STORE_IF10]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP36]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
+; CHECK-VF4-IC2:       pred.store.if12:
+; CHECK-VF4-IC2-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP39:%.*]] = add nsw i32 [[TMP38]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP39]], ptr [[TMP37]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE13]]
+; CHECK-VF4-IC2:       pred.store.continue13:
+; CHECK-VF4-IC2-NEXT:    [[TMP40:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE11]] ], [ [[TMP38]], [[PRED_STORE_IF12]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP41:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP41]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
+; CHECK-VF4-IC2:       pred.store.if14:
+; CHECK-VF4-IC2-NEXT:    [[TMP42:%.*]] = add i64 [[INDEX]], 5
+; CHECK-VF4-IC2-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP42]]
+; CHECK-VF4-IC2-NEXT:    [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP45:%.*]] = add nsw i32 [[TMP44]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP45]], ptr [[TMP43]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE15]]
+; CHECK-VF4-IC2:       pred.store.continue15:
+; CHECK-VF4-IC2-NEXT:    [[TMP46:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE13]] ], [ [[TMP44]], [[PRED_STORE_IF14]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP47:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP47]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]]
+; CHECK-VF4-IC2:       pred.store.if16:
+; CHECK-VF4-IC2-NEXT:    [[TMP48:%.*]] = add i64 [[INDEX]], 6
+; CHECK-VF4-IC2-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP48]]
+; CHECK-VF4-IC2-NEXT:    [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP51:%.*]] = add nsw i32 [[TMP50]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP51]], ptr [[TMP49]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE17]]
+; CHECK-VF4-IC2:       pred.store.continue17:
+; CHECK-VF4-IC2-NEXT:    [[TMP52:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE15]] ], [ [[TMP50]], [[PRED_STORE_IF16]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP53:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP53]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]]
+; CHECK-VF4-IC2:       pred.store.if18:
+; CHECK-VF4-IC2-NEXT:    [[TMP54:%.*]] = add i64 [[INDEX]], 7
+; CHECK-VF4-IC2-NEXT:    [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP54]]
+; CHECK-VF4-IC2-NEXT:    [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP57:%.*]] = add nsw i32 [[TMP56]], 1
+; CHECK-VF4-IC2-NEXT:    store i32 [[TMP57]], ptr [[TMP55]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF4-IC2-NEXT:    br label [[PRED_STORE_CONTINUE19]]
+; CHECK-VF4-IC2:       pred.store.continue19:
+; CHECK-VF4-IC2-NEXT:    [[TMP58:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE17]] ], [ [[TMP56]], [[PRED_STORE_IF18]] ]
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP59]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP11]], <4 x i1> [[TMP12]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP20:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP60:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP20]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP60]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP22:%.*]] = icmp ne <4 x i1> [[TMP9]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT23:%.*]] = select <4 x i1> [[RDX_SELECT_CMP22]], <4 x i1> [[TMP9]], <4 x i1> [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP24:%.*]] = icmp ne <4 x i1> [[RDX_SELECT23]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP61:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP24]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT25:%.*]] = select i1 [[TMP61]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX26:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT25]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX26]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
 ; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
@@ -282,10 +740,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2:       if.end6:
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT21]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP62:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP63:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP62]]
 ; CHECK-VF4-IC2-NEXT:    ret i32 [[TMP63]]
@@ -293,30 +751,94 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-VF1-IC2:       vector.memcheck:
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-VF1-IC2-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-VF1-IC2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP4]], align 4, !alias.scope [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt float [[TMP6]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP8]], i1 true, i1 [[VEC_PHI4]]
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[TMP8]], i1 [[VEC_PHI2]], i1 false
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-VF1-IC2:       pred.store.if:
+; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = add nsw i32 [[TMP14]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[TMP15]], ptr [[TMP13]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK-VF1-IC2:       pred.store.continue:
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_STORE_IF]] ]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       pred.store.if5:
+; CHECK-VF1-IC2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = add nsw i32 [[TMP18]], 1
+; CHECK-VF1-IC2-NEXT:    store i32 [[TMP19]], ptr [[TMP17]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2:       pred.store.continue6:
+; CHECK-VF1-IC2-NEXT:    [[TMP20:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP18]], [[PRED_STORE_IF5]] ]
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP11]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP11]], i1 [[TMP12]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne i1 [[TMP9]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select i1 [[RDX_SELECT_CMP7]], i1 [[TMP9]], i1 [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX9:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[ENTRY]] ], [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1-IC2:       for.body:
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX9]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[CMP1]], i1 true, i1 [[VEC_PHI4]]
-; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[CMP1]], i1 [[VEC_PHI2]], i1 false
-; CHECK-VF1-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
 ; CHECK-VF1-IC2:       if.then3:
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
 ; CHECK-VF1-IC2-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
-; CHECK-VF1-IC2-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK-VF1-IC2-NEXT:    br label [[IF_END6]]
 ; CHECK-VF1-IC2:       if.end6:
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[TMP10]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[TMP12]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT8]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP22:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP23:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP22]]
 ; CHECK-VF1-IC2-NEXT:    ret i32 [[TMP23]]
@@ -371,23 +893,57 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP9]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP10]], i32 [[TMP11]]
@@ -396,23 +952,70 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-VF4-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF4-IC2-NEXT:  entry:
+; CHECK-VF4-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4-IC2:       vector.ph:
+; CHECK-VF4-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
+; CHECK-VF4-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4-IC2:       vector.body:
+; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4-IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-VF4-IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
+; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-VF4-IC2:       middle.block:
+; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP14]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP15]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4-IC2:       scalar.ph:
+; CHECK-VF4-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       for.body:
-; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF4-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF4-IC2:       exit:
-; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT6]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP16:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
 ; CHECK-VF4-IC2-NEXT:    [[TMP17:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP16]], i32 [[TMP17]]
@@ -421,23 +1024,61 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-VF1-IC2-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
 ; CHECK-VF1-IC2-NEXT:  entry:
+; CHECK-VF1-IC2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1-IC2:       vector.ph:
+; CHECK-VF1-IC2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; CHECK-VF1-IC2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1-IC2:       vector.body:
+; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1-IC2-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
+; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-VF1-IC2:       middle.block:
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1-IC2:       scalar.ph:
+; CHECK-VF1-IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       for.body:
-; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX6]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
 ; CHECK-VF1-IC2-NEXT:    [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1
-; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]]
-; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
+; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK-VF1-IC2:       exit:
-; CHECK-VF1-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
 ; CHECK-VF1-IC2-NEXT:    [[TMP14:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
 ; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP13]], i32 [[TMP14]]
@@ -583,7 +1224,7 @@ exit:
 }
 
 ; Currently, this test-case is not supported.
-; int multi_user_cmp_fmax(int* a, long long n) {
+; int multi_user_cmp_max(int* a, long long n) {
 ;   _Bool any = 0;
 ;   _Bool all = 1;
 ;   int max = 0;
@@ -613,7 +1254,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
 ; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
@@ -638,7 +1279,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
 ; CHECK-VF4-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF4-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF4-IC2-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
 ; CHECK-VF4-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-VF4-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
@@ -663,7 +1304,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
 ; CHECK-VF1-IC2-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
 ; CHECK-VF1-IC2-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], i32 [[LOAD1]], i32 [[MAX_015]]
+; CHECK-VF1-IC2-NEXT:    [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
 ; CHECK-VF1-IC2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-VF1-IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
@@ -677,7 +1318,7 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
 entry:
   br label %for.body
 
-for.body:
+for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
   %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
@@ -687,17 +1328,21 @@ for.body:
   %cmp1 = icmp sgt i32 %load1, %max.015
   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
-  %.max.0 = select i1 %cmp1, i32 %load1, i32 %max.015
+  %.max.0 = tail call i32 @llvm.smax.i32(i32 %load1, i32 %max.015)
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next, %n
   br i1 %exitcond.not, label %exit, label %for.body
 
-exit:
-  %0 = select i1 %.any.0.off0, i32 2, i32 3
-  %1 = select i1 %all.0.off0., i32 1, i32 %0
+exit:                                             ; preds = %for.body
+  %.any.0.off0.lcssa = phi i1 [ %.any.0.off0, %for.body ]
+  %all.0.off0..lcssa = phi i1 [ %all.0.off0., %for.body ]
+  %0 = select i1 %.any.0.off0.lcssa, i32 2, i32 3
+  %1 = select i1 %all.0.off0..lcssa, i32 1, i32 %0
   ret i32 %1
 }
 
+declare i32 @llvm.smax.i32(i32, i32)
+
 ; Currently, this test-case is not supported.
 ; int multi_user_cmp_use_store_offset(float* a, int *b, long long n) {
 ;   _Bool any = 0;

>From c4f3f7c0a6e5a53a7bc255ed74954b915bab7c4f Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Mon, 22 Apr 2024 12:06:19 +0000
Subject: [PATCH 3/8] Resolved remarks.

---
 llvm/lib/Analysis/IVDescriptors.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 811c4b75e0705..8838f992cb6d9 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -639,9 +639,11 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
+  // Find the compare instruction that is associated with OrigPhi, i.e
+  // recurrent-reduction. And determine that SelectInst and CmpInst multiple
+  // instructions usage are safe to vectorise.
   SelectInst *SI = dyn_cast<SelectInst>(I);
   Instruction *Cmp = nullptr;
-
   if (SI) {
     bool HasOrigPhiUser = false;
     bool SelectNonPHIUserInLoop = false;
@@ -653,6 +655,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       if (Inst == OrigPhi) {
         HasOrigPhiUser = true;
       } else {
+        // If we found SelectInstr usage in the loop then the reduction stops
+        // to be recurrent and it is not safe to procede further.
         if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
             Blocks.end())
           SelectNonPHIUserInLoop = true;
@@ -683,6 +687,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       }
       if (!IsSafeCMP)
         Cmp = nullptr;
+    } else {
+      Cmp = nullptr;
     }
   }
 

>From cf0bf918b9ad53ce623f385f3f6e252fe179347c Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 1 May 2024 07:00:37 +0000
Subject: [PATCH 4/8] Resolved remarks.

---
 llvm/include/llvm/Analysis/IVDescriptors.h    | 20 +++-------
 llvm/lib/Analysis/IVDescriptors.cpp           | 20 ++--------
 .../Vectorize/LoopVectorizationLegality.cpp   | 12 +++++-
 .../LoopVectorize/AArch64/select-multi-cmp.ll | 39 +++++++++++++++++++
 4 files changed, 59 insertions(+), 32 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index f18ab500c4d9f..5c7b613ac48c4 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -76,11 +76,11 @@ class RecurrenceDescriptor {
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
                        Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
-                       unsigned MinWidthCastToRecurTy, Instruction *Cmp)
+                       unsigned MinWidthCastToRecurTy)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
         IsSigned(Signed), IsOrdered(Ordered),
-        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy), MultiCmp(Cmp) {
+        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -88,13 +88,12 @@ class RecurrenceDescriptor {
   class InstDesc {
   public:
     InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
-        : IsRecurrence(IsRecur), PatternLastInst(I), RecKind(RecurKind::None),
-          ExactFPMathInst(ExactFP), Cmp(nullptr) {}
+        : IsRecurrence(IsRecur), PatternLastInst(I),
+          RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr,
-             Instruction *MultiCmp = nullptr)
+    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP), Cmp(MultiCmp) {}
+          ExactFPMathInst(ExactFP) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
@@ -106,8 +105,6 @@ class RecurrenceDescriptor {
 
     Instruction *getPatternInst() const { return PatternLastInst; }
 
-    Instruction *getMultiCmp() const { return Cmp; }
-
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
@@ -118,8 +115,6 @@ class RecurrenceDescriptor {
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
     Instruction *ExactFPMathInst;
-    // Mult-user compare instruction.
-    Instruction *Cmp;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -275,8 +270,6 @@ class RecurrenceDescriptor {
            cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
   }
 
-  Instruction *getMultiCmp() const { return MultiCmp; }
-
   /// Reductions may store temporary or final result to an invariant address.
   /// If there is such a store in the loop then, after successfull run of
   /// AddReductionVar method, this field will be assigned the last met store.
@@ -307,7 +300,6 @@ class RecurrenceDescriptor {
   SmallPtrSet<Instruction *, 8> CastInsts;
   // The minimum width used by the recurrence.
   unsigned MinWidthCastToRecurrenceType;
-  Instruction *MultiCmp = nullptr;
 };
 
 /// A struct for saving information about induction variables.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 8838f992cb6d9..061b06a71c763 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -256,7 +256,6 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
-  Instruction *MultiCMP = nullptr;
   bool IsSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
@@ -401,8 +400,6 @@ bool RecurrenceDescriptor::AddReductionVar(
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
-    if (IsASelect)
-      MultiCMP = ReduxDesc.getMultiCmp();
 
     // A conditional reduction operation must only have 2 or less uses in
     // VisitedInsts.
@@ -600,8 +597,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
                           FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
-                          MultiCMP);
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
   RedDes = RD;
 
   return true;
@@ -639,11 +635,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
-  // Find the compare instruction that is associated with OrigPhi, i.e
-  // recurrent-reduction. And determine that SelectInst and CmpInst multiple
-  // instructions usage are safe to vectorise.
   SelectInst *SI = dyn_cast<SelectInst>(I);
   Instruction *Cmp = nullptr;
+
   if (SI) {
     bool HasOrigPhiUser = false;
     bool SelectNonPHIUserInLoop = false;
@@ -655,8 +649,6 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       if (Inst == OrigPhi) {
         HasOrigPhiUser = true;
       } else {
-        // If we found SelectInstr usage in the loop then the reduction stops
-        // to be recurrent and it is not safe to procede further.
         if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
             Blocks.end())
           SelectNonPHIUserInLoop = true;
@@ -687,8 +679,6 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       }
       if (!IsSafeCMP)
         Cmp = nullptr;
-    } else {
-      Cmp = nullptr;
     }
   }
 
@@ -711,10 +701,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   if (!Loop->isLoopInvariant(NonPhi))
     return InstDesc(false, I);
 
-  return InstDesc(I,
-                  isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
-                                                  : RecurKind::FAnyOf,
-                  nullptr, Cmp);
+  return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
+                                                     : RecurKind::FAnyOf);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index c8e8c011972d2..fe0b29bfe2130 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -831,8 +831,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
-          Instruction *Cmp = RedDes.getMultiCmp();
-          if (Cmp) {
+          CmpInst *Cmp = nullptr;
+          for (Value *V :
+               {Phi->getIncomingValue(0), Phi->getIncomingValue(1)}) {
+            if (Instruction *SI = dyn_cast<SelectInst>(V))
+              Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+          }
+          if (Cmp && !Cmp->hasOneUse()) {
+            RecurKind Kind = RedDes.getRecurrenceKind();
+            assert((Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf) &&
+                   "Unexpected type of recurrence");
             if (MultiCmpsRed.contains(Cmp))
               MultiCmpsRed[Cmp]++;
             else
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
new file mode 100644
index 0000000000000..483240770e87b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %load1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   br i1 %exitcond.not, label %exit, label %for.body
+entry:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}

>From d136dcab1d79cd116f4641f5090a13e9c6758bbf Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 1 May 2024 21:38:33 +0000
Subject: [PATCH 5/8] Changed multi_user_cmp_max() function to use
 llvm.smax.i32 intrinsic, Replaced std::find(Blocks.begin(), Blocks.end(),...
 to Loop->contains(Inst->getParent()), added comments.

---
 llvm/lib/Analysis/IVDescriptors.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 061b06a71c763..a287ec900cadc 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -639,9 +639,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   Instruction *Cmp = nullptr;
 
   if (SI) {
+    // Check that SelectInst is related to the this PHI reduction.
     bool HasOrigPhiUser = false;
     bool SelectNonPHIUserInLoop = false;
-    auto Blocks = Loop->getBlocksVector();
     for (User *U : SI->users()) {
       Instruction *Inst = dyn_cast<Instruction>(U);
       if (!Inst)
@@ -649,12 +649,12 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       if (Inst == OrigPhi) {
         HasOrigPhiUser = true;
       } else {
-        if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
-            Blocks.end())
+        if (Loop->contains(Inst->getParent()))
           SelectNonPHIUserInLoop = true;
       }
     }
     Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+    // Checking the current CmpInst is safe as a recurrent reduction.
     if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
       bool IsSafeCMP = true;
       for (User *U : Cmp->users()) {
@@ -662,19 +662,17 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
         if (!UInst)
           continue;
         if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
-          if (!llvm::all_of(SI1->users(), [Blocks](User *USI) {
+          if (!llvm::all_of(SI1->users(), [Loop](User *USI) {
                 Instruction *Inst1 = dyn_cast<Instruction>(USI);
-                if (!Inst1 || (std::find(Blocks.begin(), Blocks.end(),
-                                         Inst1->getParent()) == Blocks.end() ||
-                               isa<PHINode>(Inst1)))
+                if (!Inst1 || !Loop->contains(Inst1->getParent()) ||
+                    isa<PHINode>(Inst1))
                   return true;
                 return false;
               }))
             IsSafeCMP = false;
         }
         if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
-            std::find(Blocks.begin(), Blocks.end(), UInst->getParent()) !=
-                Blocks.end())
+            Loop->contains(UInst->getParent()))
           IsSafeCMP = false;
       }
       if (!IsSafeCMP)

>From 2f76ef9406d73d64d2acc061a4358fbff2e24575 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 2 May 2024 15:59:22 +0000
Subject: [PATCH 6/8] Fix a compile time failure, while testing proposed
 change.

---
 .../lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index fe0b29bfe2130..1888f022b8006 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -837,10 +837,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
             if (Instruction *SI = dyn_cast<SelectInst>(V))
               Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
           }
-          if (Cmp && !Cmp->hasOneUse()) {
-            RecurKind Kind = RedDes.getRecurrenceKind();
-            assert((Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf) &&
-                   "Unexpected type of recurrence");
+          RecurKind Kind = RedDes.getRecurrenceKind();
+          if (Cmp && !Cmp->hasOneUse() &&
+              (Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf)) {
             if (MultiCmpsRed.contains(Cmp))
               MultiCmpsRed[Cmp]++;
             else

>From 7e650b62253289996156205e31b73686a0513649 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 16 May 2024 11:19:26 +0000
Subject: [PATCH 7/8] Update tests.

---
 llvm/lib/Analysis/IVDescriptors.cpp           |  52 +---
 .../Vectorize/LoopVectorizationLegality.cpp   |  32 ---
 .../LoopVectorize/AArch64/select-costs.ll     |  39 ++-
 .../LoopVectorize/AArch64/select-multi-cmp.ll |  39 ---
 .../{multicmp.ll => select-cmp-multiuse.ll}   | 268 ++++++++++--------
 .../Transforms/LoopVectorize/select-cmp.ll    |  27 --
 6 files changed, 183 insertions(+), 274 deletions(-)
 delete mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
 rename llvm/test/Transforms/LoopVectorize/{multicmp.ll => select-cmp-multiuse.ll} (89%)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index a287ec900cadc..9a711c204210d 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -635,57 +635,13 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
-  SelectInst *SI = dyn_cast<SelectInst>(I);
-  Instruction *Cmp = nullptr;
-
-  if (SI) {
-    // Check that SelectInst is related to the this PHI reduction.
-    bool HasOrigPhiUser = false;
-    bool SelectNonPHIUserInLoop = false;
-    for (User *U : SI->users()) {
-      Instruction *Inst = dyn_cast<Instruction>(U);
-      if (!Inst)
-        continue;
-      if (Inst == OrigPhi) {
-        HasOrigPhiUser = true;
-      } else {
-        if (Loop->contains(Inst->getParent()))
-          SelectNonPHIUserInLoop = true;
-      }
-    }
-    Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
-    // Checking the current CmpInst is safe as a recurrent reduction.
-    if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
-      bool IsSafeCMP = true;
-      for (User *U : Cmp->users()) {
-        Instruction *UInst = dyn_cast<Instruction>(U);
-        if (!UInst)
-          continue;
-        if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
-          if (!llvm::all_of(SI1->users(), [Loop](User *USI) {
-                Instruction *Inst1 = dyn_cast<Instruction>(USI);
-                if (!Inst1 || !Loop->contains(Inst1->getParent()) ||
-                    isa<PHINode>(Inst1))
-                  return true;
-                return false;
-              }))
-            IsSafeCMP = false;
-        }
-        if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
-            Loop->contains(UInst->getParent()))
-          IsSafeCMP = false;
-      }
-      if (!IsSafeCMP)
-        Cmp = nullptr;
-    }
-  }
-
-  // Only match select with single use cmp condition.
-  if (!Cmp && !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
-                                 m_Value(), m_Value())))
+  if (!match(I,
+             m_Select(m_Cmp(Pred, m_Value(), m_Value()), m_Value(), m_Value())))
     return InstDesc(false, I);
 
+  SelectInst *SI = cast<SelectInst>(I);
   Value *NonPhi = nullptr;
+
   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
     NonPhi = SI->getFalseValue();
   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 1888f022b8006..0c18c4e146de1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -787,7 +787,6 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
-  DenseMap<Instruction *, unsigned> MultiCmpsRed;
 
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
@@ -831,20 +830,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
-          CmpInst *Cmp = nullptr;
-          for (Value *V :
-               {Phi->getIncomingValue(0), Phi->getIncomingValue(1)}) {
-            if (Instruction *SI = dyn_cast<SelectInst>(V))
-              Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
-          }
-          RecurKind Kind = RedDes.getRecurrenceKind();
-          if (Cmp && !Cmp->hasOneUse() &&
-              (Kind == RecurKind::IAnyOf || Kind == RecurKind::FAnyOf)) {
-            if (MultiCmpsRed.contains(Cmp))
-              MultiCmpsRed[Cmp]++;
-            else
-              MultiCmpsRed[Cmp] = 1;
-          }
           continue;
         }
 
@@ -1060,23 +1045,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
-  // Make sure that all compare instruction users are recurrent if in loop's BB.
-  if (MultiCmpsRed.size() > 0) {
-    auto Blocks = TheLoop->getBlocksVector();
-    for (auto const &C : MultiCmpsRed) {
-      Instruction *Cmp = C.first;
-      unsigned Counter = 0;
-      for (User *U : Cmp->users()) {
-        SelectInst *Inst = dyn_cast<SelectInst>(U);
-        if (Inst && std::find(Blocks.begin(), Blocks.end(),
-                              Inst->getParent()) != Blocks.end())
-          Counter++;
-      }
-      if (Counter != C.second)
-        return false;
-    }
-  }
-
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
index 1cde8b9bad6fc..25342040aad36 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
@@ -13,10 +13,6 @@ define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
 
-; CHECK-LABEL: define void @selects_1(
-; CHECK:       vector.body:
-; CHECK:         select <4 x i1>
-
 entry:
   %cmp26 = icmp sgt i32 %N, 0
   br i1 %cmp26, label %for.body.preheader, label %for.cond.cleanup
@@ -47,3 +43,38 @@ for.cond.cleanup.loopexit:                        ; preds = %for.body
 for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
   ret void
 }
+
+define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
+; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+entry:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %load1 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %load1, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  ret i32 %1
+}
+
+; CHECK-LABEL: define void @selects_1(
+; CHECK:       vector.body:
+; CHECK:         select <4 x i1>
+
+; CHECK-LABEL: define i32 @multi_user_cmp(
+; CHECK:       vector.body:
+; CHECK:         %index = phi i64
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
deleted file mode 100644
index 483240770e87b..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-multi-cmp.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
-
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
-target triple = "aarch64-unknown-linux-gnu"
-
-define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
-; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %load1 = load float, ptr %arrayidx, align 4
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK-NEXT: LV: Found an estimated cost of 1 for VF 4 For instruction:   %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
-; CHECK-NEXT: LV: Found an estimated cost of 0 for VF 4 For instruction:   br i1 %exitcond.not, label %exit, label %for.body
-entry:
-  %wide.trip.count = zext nneg i32 %n to i64
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %all.0.off010 = phi i1 [ true, %entry ], [ %all.0.off0., %for.body ]
-  %any.0.off09 = phi i1 [ false, %entry ], [ %.any.0.off0, %for.body ]
-  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
-  %load1 = load float, ptr %arrayidx, align 4
-  %cmp1 = fcmp olt float %load1, 0.000000e+00
-  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
-  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
-  br i1 %exitcond.not, label %exit, label %for.body
-
-exit:
-  %0 = select i1 %.any.0.off0, i32 2, i32 3
-  %1 = select i1 %all.0.off0., i32 1, i32 %0
-  ret i32 %1
-}
diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
similarity index 89%
rename from llvm/test/Transforms/LoopVectorize/multicmp.ll
rename to llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index 17c7383afd8b0..e826d62dcb29d 100644
--- a/llvm/test/Transforms/LoopVectorize/multicmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -28,25 +28,26 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP12]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP8]]
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP11]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -84,10 +85,10 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -98,24 +99,24 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP17]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP19]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP13]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP20]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP19]], [[TMP17]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP14]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP18]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -153,10 +154,10 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -165,18 +166,22 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
@@ -249,25 +254,26 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[TMP7:%.*]] = freeze i1 [[TMP12]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP11:%.*]] = freeze i1 [[TMP8]]
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP11]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -305,10 +311,10 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -319,24 +325,24 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP17]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP19]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP13]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP13]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP20]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP19]], [[TMP17]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP14]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP18:%.*]] = freeze i1 [[TMP14]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP18]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -374,10 +380,10 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -386,18 +392,22 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = icmp slt i32 [[TMP4]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[TMP5]], 0
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP15:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
@@ -481,15 +491,16 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP9:%.*]], [[PRED_STORE_CONTINUE8]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP5]] = or <4 x i1> [[VEC_PHI2]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -537,12 +548,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP31:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP36:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; CHECK-NEXT:    [[TMP31:%.*]] = freeze i1 [[TMP36]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP31]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP32]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP35:%.*]] = freeze i1 [[TMP32]]
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP35]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -596,10 +607,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE19:%.*]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE19]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP64:%.*]], [[PRED_STORE_CONTINUE19]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_STORE_CONTINUE19]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
@@ -610,10 +621,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI4]]
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI2]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP64]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP66]] = or <4 x i1> [[VEC_PHI6]], [[TMP8]]
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10]] = or <4 x i1> [[VEC_PHI4]], [[TMP12]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF4-IC2:       pred.store.if:
@@ -704,16 +717,14 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF4-IC2-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP59]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP11]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP11]], <4 x i1> [[TMP12]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP20:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP20:%.*]] = or <4 x i1> [[TMP10]], [[TMP9]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP60:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP20]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP60]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP22:%.*]] = icmp ne <4 x i1> [[TMP9]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT23:%.*]] = select <4 x i1> [[RDX_SELECT_CMP22]], <4 x i1> [[TMP9]], <4 x i1> [[TMP10]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP24:%.*]] = icmp ne <4 x i1> [[RDX_SELECT23]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP67:%.*]] = freeze i1 [[TMP60]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT21:%.*]] = select i1 [[TMP67]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP24:%.*]] = or <4 x i1> [[TMP66]], [[TMP64]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP61:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP24]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT25:%.*]] = select i1 [[TMP61]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP65:%.*]] = freeze i1 [[TMP61]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT25:%.*]] = select i1 [[TMP65]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -767,10 +778,10 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_STORE_CONTINUE6]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI6:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_STORE_CONTINUE6]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
@@ -779,10 +790,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP4]], align 4, !alias.scope [[META6]]
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP8:%.*]] = fcmp olt float [[TMP6]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP8]], i1 true, i1 [[VEC_PHI4]]
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP12]] = select i1 [[TMP8]], i1 [[VEC_PHI2]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP26]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP27]] = or i1 [[VEC_PHI6]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = xor i1 [[TMP8]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10]] = or i1 [[VEC_PHI4]], [[TMP12]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK-VF1-IC2:       pred.store.if:
 ; CHECK-VF1-IC2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
@@ -805,10 +818,12 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
 ; CHECK-VF1-IC2-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP11]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP11]], i1 [[TMP12]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne i1 [[TMP9]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select i1 [[RDX_SELECT_CMP7]], i1 [[TMP9]], i1 [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP10]], [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[TMP24:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP24]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX7:%.*]] = or i1 [[TMP27]], [[TMP26]]
+; CHECK-VF1-IC2-NEXT:    [[TMP25:%.*]] = freeze i1 [[BIN_RDX7]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select i1 [[TMP25]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
@@ -901,26 +916,27 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
-; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[RDX_SELECT_CMP2]] = or <4 x i1> [[VEC_PHI2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
-; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-NEXT:    [[TMP8:%.*]] = freeze i1 [[TMP13]]
 ; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i1 false, i1 true
-; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
-; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP9]], i1 true, i1 false
+; CHECK-NEXT:    [[TMP14:%.*]] = freeze i1 [[TMP9]]
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP14]], i1 true, i1 false
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -960,10 +976,10 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF4-IC2:       vector.body:
 ; CHECK-VF4-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF4-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF4-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF4-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -974,25 +990,25 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF4-IC2-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
 ; CHECK-VF4-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-VF4-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP8]] = select <4 x i1> [[TMP6]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
-; CHECK-VF4-IC2-NEXT:    [[TMP9]] = select <4 x i1> [[TMP7]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI3]]
-; CHECK-VF4-IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i1> [[VEC_PHI1]], <4 x i1> zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP20]] = or <4 x i1> [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF4-IC2-NEXT:    [[TMP21]] = or <4 x i1> [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF4-IC2-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[TMP8]] = or <4 x i1> [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF4-IC2-NEXT:    [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF4-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-VF4-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK-VF4-IC2:       middle.block:
 ; CHECK-VF4-IC2-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i1> [[TMP10]], <4 x i1> [[TMP11]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = icmp ne <4 x i1> [[RDX_SELECT]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP5:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP5]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP14]], i1 false, i1 true
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP7:%.*]] = icmp ne <4 x i1> [[TMP8]], zeroinitializer
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT8:%.*]] = select <4 x i1> [[RDX_SELECT_CMP7]], <4 x i1> [[TMP8]], <4 x i1> [[TMP9]]
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[RDX_SELECT8]], zeroinitializer
+; CHECK-VF4-IC2-NEXT:    [[TMP22:%.*]] = freeze i1 [[TMP14]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT6:%.*]] = select i1 [[TMP22]], i1 false, i1 true
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT_CMP9:%.*]] = or <4 x i1> [[TMP21]], [[TMP20]]
 ; CHECK-VF4-IC2-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
-; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP15]], i1 true, i1 false
+; CHECK-VF4-IC2-NEXT:    [[TMP19:%.*]] = freeze i1 [[TMP15]]
+; CHECK-VF4-IC2-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP19]], i1 true, i1 false
 ; CHECK-VF4-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF4-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF4-IC2:       scalar.ph:
@@ -1032,10 +1048,10 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK-VF1-IC2:       vector.body:
 ; CHECK-VF1-IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-VF1-IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i1 [ true, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI4:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1-IC2-NEXT:    [[VEC_PHI5:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-VF1-IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-VF1-IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-VF1-IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
@@ -1044,18 +1060,22 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
 ; CHECK-VF1-IC2-NEXT:    [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
 ; CHECK-VF1-IC2-NEXT:    [[TMP6:%.*]] = fcmp olt float [[TMP4]], 0.000000e+00
 ; CHECK-VF1-IC2-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP5]], 0.000000e+00
-; CHECK-VF1-IC2-NEXT:    [[TMP8]] = select i1 [[TMP6]], i1 true, i1 [[VEC_PHI2]]
-; CHECK-VF1-IC2-NEXT:    [[TMP9]] = select i1 [[TMP7]], i1 true, i1 [[VEC_PHI3]]
-; CHECK-VF1-IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i1 [[VEC_PHI]], i1 false
-; CHECK-VF1-IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i1 [[VEC_PHI1]], i1 false
+; CHECK-VF1-IC2-NEXT:    [[TMP17]] = or i1 [[VEC_PHI4]], [[TMP6]]
+; CHECK-VF1-IC2-NEXT:    [[TMP18]] = or i1 [[VEC_PHI5]], [[TMP7]]
+; CHECK-VF1-IC2-NEXT:    [[TMP10:%.*]] = xor i1 [[TMP6]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP11:%.*]] = xor i1 [[TMP7]], true
+; CHECK-VF1-IC2-NEXT:    [[TMP8]] = or i1 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1-IC2-NEXT:    [[TMP9]] = or i1 [[VEC_PHI3]], [[TMP11]]
 ; CHECK-VF1-IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-VF1-IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; CHECK-VF1-IC2:       middle.block:
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i1 [[TMP10]], true
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i1 [[TMP10]], i1 [[TMP11]]
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i1 [[TMP8]], false
-; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i1 [[TMP8]], i1 [[TMP9]]
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX:%.*]] = or i1 [[TMP9]], [[TMP8]]
+; CHECK-VF1-IC2-NEXT:    [[TMP19:%.*]] = freeze i1 [[BIN_RDX]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i1 false, i1 true
+; CHECK-VF1-IC2-NEXT:    [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[TMP17]]
+; CHECK-VF1-IC2-NEXT:    [[TMP16:%.*]] = freeze i1 [[BIN_RDX4]]
+; CHECK-VF1-IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[TMP16]], i1 true, i1 false
 ; CHECK-VF1-IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-VF1-IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK-VF1-IC2:       scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 993b56a05207b..da0f7283d80d5 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -272,33 +272,6 @@ exit:                                     ; preds = %for.body
 }
 
 
-; We don't support select/cmp reduction patterns where there is more than one
-; use of the icmp/fcmp.
-define i32 @select_const_i32_from_icmp_mul_use(ptr nocapture readonly %v1, ptr %v2, i64 %n) {
-; CHECK-LABEL: @select_const_i32_from_icmp_mul_use
-; CHECK-NOT: vector.body
-entry:
-  br label %for.body
-
-for.body:                                      ; preds = %entry, %for.body
-  %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
-  %1 = phi i32 [ 3, %entry ], [ %6, %for.body ]
-  %2 = phi i32 [ 0, %entry ], [ %7, %for.body ]
-  %3 = getelementptr inbounds i32, ptr %v1, i64 %0
-  %4 = load i32, ptr %3, align 4
-  %5 = icmp eq i32 %4, 3
-  %6 = select i1 %5, i32 %1, i32 7
-  %7 = zext i1 %5 to i32
-  %8 = add nuw nsw i64 %0, 1
-  %9 = icmp eq i64 %8, %n
-  br i1 %9, label %exit, label %for.body
-
-exit:                                     ; preds = %for.body
-  store i32 %7, ptr %v2, align 4
-  ret i32 %6
-}
-
-
 ; We don't support selecting loop-variant values.
 define i32 @select_variant_i32_from_icmp(ptr nocapture readonly %v1, ptr nocapture readonly %v2, i64 %n) {
 ; CHECK-LABEL: @select_variant_i32_from_icmp

>From 356ab983dcd1e26e91b5598bad14112a5d3fbed3 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Wed, 26 Jun 2024 11:45:31 +0000
Subject: [PATCH 8/8] Further test update

---
 .../LoopVectorize/AArch64/select-costs.ll          | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
index 25342040aad36..c64bbc2107238 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-costs.ll
@@ -1,10 +1,11 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S 2>&1 | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios5.0.0"
 
 define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
+; CHECK: LV: Checking a loop in 'selects_1'
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond = select i1 %cmp1, i32 10, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
@@ -12,6 +13,7 @@ define void @selects_1(ptr nocapture %dst, i32 %A, i32 %B, i32 %C, i32 %N) {
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond = select i1 %cmp1, i32 10, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond6 = select i1 %cmp2, i32 30, i32 %and
 ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction:   %cond11 = select i1 %cmp7, i32 %cond, i32 %cond6
+; CHECK: LV: Selecting VF: 4
 
 entry:
   %cmp26 = icmp sgt i32 %N, 0
@@ -45,9 +47,11 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup.lo
 }
 
 define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+; CHECK: LV: Checking a loop in 'multi_user_cmp'
 ; CHECK: LV: Found an estimated cost of 4 for VF 16 For instruction:   %cmp1 = fcmp olt float %load1, 0.000000e+00
 ; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
 ; CHECK: LV: Found an estimated cost of 1 for VF 16 For instruction:   %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+; CHECK: LV: Selecting VF: 16.
 entry:
   %wide.trip.count = zext nneg i32 %n to i64
   br label %for.body
@@ -70,11 +74,3 @@ exit:
   %1 = select i1 %all.0.off0., i32 1, i32 %0
   ret i32 %1
 }
-
-; CHECK-LABEL: define void @selects_1(
-; CHECK:       vector.body:
-; CHECK:         select <4 x i1>
-
-; CHECK-LABEL: define i32 @multi_user_cmp(
-; CHECK:       vector.body:
-; CHECK:         %index = phi i64



More information about the llvm-commits mailing list