[llvm] [LoopVectorize] LLVM fails to vectorise loops with multi-bool varables (PR #89226)

Dinar Temirbulatov via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 22 05:10:41 PDT 2024


https://github.com/dtemirbulatov updated https://github.com/llvm/llvm-project/pull/89226

>From 6bbc7ec56c5afa1a25c8b871508cf3c52e8768de Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 18 Apr 2024 11:03:44 +0000
Subject: [PATCH 1/2] [LoopVectorize] LLVM fails to vectorise loops with
 multiple bool variables

This patch allows to consider compare instructions in the loop with multiple
use inside the loop and outside, if we can prove that compare instruction user
is a recurrent reduction or used in branching or outside the loop then it is
safe to consider to vectorise.

This change allows to vectorise this loop:
int foo(float* a, int n) {
  _Bool any = 0;
  _Bool all = 1;
  for (int i = 0; i < n; i++) {
    if (a[i] < 0.0f) {
      any = 1;
    } else {
      all = 0;
    }
  }
  return all ? 1 : any ? 2 : 3;
}
---
 llvm/include/llvm/Analysis/IVDescriptors.h    |  20 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |  65 +-
 .../Vectorize/LoopVectorizationLegality.cpp   |  25 +
 .../test/Transforms/LoopVectorize/multicmp.ll | 794 ++++++++++++++++++
 4 files changed, 891 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/multicmp.ll

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 5c7b613ac48c40..f18ab500c4d9fa 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -76,11 +76,11 @@ class RecurrenceDescriptor {
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
                        Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
-                       unsigned MinWidthCastToRecurTy)
+                       unsigned MinWidthCastToRecurTy, Instruction *Cmp)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
         IsSigned(Signed), IsOrdered(Ordered),
-        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
+        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy), MultiCmp(Cmp) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -88,12 +88,13 @@ class RecurrenceDescriptor {
   class InstDesc {
   public:
     InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
-        : IsRecurrence(IsRecur), PatternLastInst(I),
-          RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
+        : IsRecurrence(IsRecur), PatternLastInst(I), RecKind(RecurKind::None),
+          ExactFPMathInst(ExactFP), Cmp(nullptr) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
+    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr,
+             Instruction *MultiCmp = nullptr)
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP) {}
+          ExactFPMathInst(ExactFP), Cmp(MultiCmp) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
@@ -105,6 +106,8 @@ class RecurrenceDescriptor {
 
     Instruction *getPatternInst() const { return PatternLastInst; }
 
+    Instruction *getMultiCmp() const { return Cmp; }
+
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
@@ -115,6 +118,8 @@ class RecurrenceDescriptor {
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
     Instruction *ExactFPMathInst;
+    // Mult-user compare instruction.
+    Instruction *Cmp;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -270,6 +275,8 @@ class RecurrenceDescriptor {
            cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
   }
 
+  Instruction *getMultiCmp() const { return MultiCmp; }
+
   /// Reductions may store temporary or final result to an invariant address.
   /// If there is such a store in the loop then, after successfull run of
   /// AddReductionVar method, this field will be assigned the last met store.
@@ -300,6 +307,7 @@ class RecurrenceDescriptor {
   SmallPtrSet<Instruction *, 8> CastInsts;
   // The minimum width used by the recurrence.
   unsigned MinWidthCastToRecurrenceType;
+  Instruction *MultiCmp = nullptr;
 };
 
 /// A struct for saving information about induction variables.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 055f121e743411..811c4b75e07052 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -256,6 +256,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
+  Instruction *MultiCMP = nullptr;
   bool IsSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
@@ -400,6 +401,8 @@ bool RecurrenceDescriptor::AddReductionVar(
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
+    if (IsASelect)
+      MultiCMP = ReduxDesc.getMultiCmp();
 
     // A conditional reduction operation must only have 2 or less uses in
     // VisitedInsts.
@@ -597,7 +600,8 @@ bool RecurrenceDescriptor::AddReductionVar(
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
                           FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
+                          MultiCMP);
   RedDes = RD;
 
   return true;
@@ -635,14 +639,59 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
+  SelectInst *SI = dyn_cast<SelectInst>(I);
+  Instruction *Cmp = nullptr;
+
+  if (SI) {
+    bool HasOrigPhiUser = false;
+    bool SelectNonPHIUserInLoop = false;
+    auto Blocks = Loop->getBlocksVector();
+    for (User *U : SI->users()) {
+      Instruction *Inst = dyn_cast<Instruction>(U);
+      if (!Inst)
+        continue;
+      if (Inst == OrigPhi) {
+        HasOrigPhiUser = true;
+      } else {
+        if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
+            Blocks.end())
+          SelectNonPHIUserInLoop = true;
+      }
+    }
+    Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+    if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
+      bool IsSafeCMP = true;
+      for (User *U : Cmp->users()) {
+        Instruction *UInst = dyn_cast<Instruction>(U);
+        if (!UInst)
+          continue;
+        if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
+          if (!llvm::all_of(SI1->users(), [Blocks](User *USI) {
+                Instruction *Inst1 = dyn_cast<Instruction>(USI);
+                if (!Inst1 || (std::find(Blocks.begin(), Blocks.end(),
+                                         Inst1->getParent()) == Blocks.end() ||
+                               isa<PHINode>(Inst1)))
+                  return true;
+                return false;
+              }))
+            IsSafeCMP = false;
+        }
+        if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
+            std::find(Blocks.begin(), Blocks.end(), UInst->getParent()) !=
+                Blocks.end())
+          IsSafeCMP = false;
+      }
+      if (!IsSafeCMP)
+        Cmp = nullptr;
+    }
+  }
+
   // Only match select with single use cmp condition.
-  if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
-                         m_Value())))
+  if (!Cmp && !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
+                                 m_Value(), m_Value())))
     return InstDesc(false, I);
 
-  SelectInst *SI = cast<SelectInst>(I);
   Value *NonPhi = nullptr;
-
   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
     NonPhi = SI->getFalseValue();
   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
@@ -656,8 +705,10 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   if (!Loop->isLoopInvariant(NonPhi))
     return InstDesc(false, I);
 
-  return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
-                                                     : RecurKind::FAnyOf);
+  return InstDesc(I,
+                  isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
+                                                  : RecurKind::FAnyOf,
+                  nullptr, Cmp);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index d33743e74cbe31..257be42be0d4f8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -787,6 +787,7 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
+  DenseMap<Instruction *, unsigned> MultiCmpsRed;
 
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
@@ -830,6 +831,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
+          Instruction *Cmp = RedDes.getMultiCmp();
+          if (Cmp) {
+            if (MultiCmpsRed.contains(Cmp))
+              MultiCmpsRed[Cmp]++;
+            else
+              MultiCmpsRed[Cmp] = 1;
+          }
           continue;
         }
 
@@ -1045,6 +1053,23 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
+  // Make sure that all compare instruction users are recurrent if in loop's BB.
+  if (MultiCmpsRed.size() > 0) {
+    auto Blocks = TheLoop->getBlocksVector();
+    for (auto const &C : MultiCmpsRed) {
+      Instruction *Cmp = C.first;
+      unsigned Counter = 0;
+      for (User *U : Cmp->users()) {
+        SelectInst *Inst = dyn_cast<SelectInst>(U);
+        if (Inst && std::find(Blocks.begin(), Blocks.end(),
+                              Inst->getParent()) != Blocks.end())
+          Counter++;
+      }
+      if (Counter != C.second)
+        return false;
+    }
+  }
+
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.
diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/multicmp.ll
new file mode 100644
index 00000000000000..30ef11b8b4b309
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/multicmp.ll
@@ -0,0 +1,794 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
+
+define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP11]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_int(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 0
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp slt i32 %2, 0
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP17:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP17]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI2]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 1
+; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP21]], [[PRED_STORE_IF5]] ]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP27]], [[PRED_STORE_IF7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP31:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP31]], i1 true, i1 false
+; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP32]], i1 false, i1 true
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[IF_END6:%.*]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP33]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP34]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ANY_0_OFF020:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ALL_0_OFF018:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP35]], 0.000000e+00
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF018]], i1 false
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF020]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP36]], 1
+; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    br label [[IF_END6]]
+; CHECK:       if.end6:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+;
+entry:
+  %cmp17 = icmp sgt i32 %n, 0
+  br i1 %cmp17, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %if.end6 ]
+  %any.0.off020 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %if.end6 ]
+  %all.0.off018 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %if.end6 ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off018, i1 false
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off020
+  br i1 %cmp1, label %if.then3, label %if.end6
+
+if.then3:
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx5, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, ptr %arrayidx5, align 4
+  br label %if.end6
+
+if.end6:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, ptr %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP20:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP20]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META13:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 1
+; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP8]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP21]], [[PRED_STORE_IF5]] ]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP27]], [[PRED_STORE_IF7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP32]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP33]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1:%.*]], [[FOR_INC:%.*]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_INC]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP34:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-NEXT:    [[TMP35:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP34]], i32 [[TMP35]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP36]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ALL_0_OFF022:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ANY_0_OFF021:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP37:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1]] = fcmp olt float [[TMP37]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF021]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF022]], i1 false
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[FOR_INC]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP38]], 1
+; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+;
+entry:
+  %cmp20 = icmp sgt i32 %n, 0
+  br i1 %cmp20, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = zext i1 %cmp1 to i32
+  %1 = select i1 %.any.0.off0, i32 2, i32 3
+  %2 = select i1 %all.0.off0., i32 %0, i32 %1
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 0, %entry ], [ %2, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
+  %all.0.off022 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.inc ]
+  %any.0.off021 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.inc ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %3, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off021
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off022, i1 false
+  br i1 %cmp1, label %if.then3, label %for.inc
+
+if.then3:
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %4 = load i32, ptr %arrayidx5, align 4
+  %inc = add nsw i32 %4, 1
+  store i32 %inc, ptr %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_fmax(ptr readonly %a, ptr readnone %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr readnone [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP13:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP13]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF017:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[FOR_BODY_PREHEADER]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF014:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[TMP2]], [[MAX_015]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF014]], i1 false
+; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[TMP2]], float [[MAX_015]]
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF017]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %any.0.off017 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %max.015 = phi float [ 0xFFF0000000000000, %for.body.preheader ], [ %.max.0, %for.body ]
+  %all.0.off014 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp ogt float %2, %max.015
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off014, i1 false
+  %.max.0 = select i1 %cmp1, float %2, float %max.015
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off017
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP19:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF022:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF020:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP2]], 0.000000e+00
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF020]], i1 false
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF022]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTANY_0_OFF0]] to i32
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N]]
+; CHECK-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp19 = icmp sgt i32 %n, 0
+  br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %any.0.off022 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %all.0.off020 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off020, i1 false
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off022
+  %conv = zext i1 %.any.0.off0 to i32
+  %conv4 = zext i1 %cmp1 to i32
+  %add = add nuw nsw i32 %conv4, %n
+  %idxprom5 = zext nneg i32 %add to i64
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %idxprom5
+  store i32 %conv, ptr %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP2]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], [[INDVARS_IV]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  %3 = sext i1 %cmp1 to i64
+  %4 = add i64 %3, %indvars.iv
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_extra_select(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP2]], 0
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp slt i32 %2, 0
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %3 = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}

>From 2b594b41194cdb51748201d1dd9c2b59838bff88 Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Mon, 22 Apr 2024 12:06:19 +0000
Subject: [PATCH 2/2] Resolved remarks.

---
 llvm/lib/Analysis/IVDescriptors.cpp           |  8 +-
 .../test/Transforms/LoopVectorize/multicmp.ll | 89 +++++++++++++++++++
 2 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 811c4b75e07052..8838f992cb6d91 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -639,9 +639,11 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
+  // Find the compare instruction that is associated with OrigPhi, i.e
+  // recurrent-reduction. And determine that SelectInst and CmpInst multiple
+  // instructions usage are safe to vectorise.
   SelectInst *SI = dyn_cast<SelectInst>(I);
   Instruction *Cmp = nullptr;
-
   if (SI) {
     bool HasOrigPhiUser = false;
     bool SelectNonPHIUserInLoop = false;
@@ -653,6 +655,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       if (Inst == OrigPhi) {
         HasOrigPhiUser = true;
       } else {
+        // If we found SelectInstr usage in the loop then the reduction stops
+        // to be recurrent and it is not safe to procede further.
         if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
             Blocks.end())
           SelectNonPHIUserInLoop = true;
@@ -683,6 +687,8 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       }
       if (!IsSafeCMP)
         Cmp = nullptr;
+    } else {
+      Cmp = nullptr;
     }
   }
 
diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/multicmp.ll
index 30ef11b8b4b309..4933f08ed1185b 100644
--- a/llvm/test/Transforms/LoopVectorize/multicmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/multicmp.ll
@@ -1,6 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
 
+
+; int multi_user_cmp(float* a, int n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  for (int i = 0; i < n; i++) {
+;    if (a[i] < 0.0f) {
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;  }
+;  return all ? 1 : any ? 2 : 3;
+;}
 define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
@@ -96,6 +109,18 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
+;int multi_user_cmp_int(int* a, int n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  for (int i = 0; i < n; i++) {
+;    if (a[i] < 0) {
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;  }
+;  return all ? 1 : any ? 2 : 3;
+;}
 define i32 @multi_user_cmp_int(ptr readonly %a, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_int(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
@@ -191,6 +216,21 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
+;int multi_user_cmp_branch_use(float* a, int *b, int n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  for (int i = 0; i < n; i++) {
+;    _Bool c = a[i] < 0.0f;
+;    if (c) {
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;    if (c)
+;      b[i]++;
+;  }
+;  return all ? 1 : any ? 2 : 3;
+;}
 define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
 ; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i32 noundef [[N:%.*]]) {
@@ -355,6 +395,22 @@ if.end6:
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
+;int multi_user_cmp_branch_use_and_outside_bb_use(float* a, int *b, int n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  _Bool c;
+;  for (int i = 0; i < n; i++) {
+;    c = a[i] < 0.0f;
+;    if (c) {
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;    if (c)
+;      b[i]++;
+;  }
+;  return all ? c : any ? 2 : 3;
+;}
 define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, ptr %b, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
 ; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i32 noundef [[N:%.*]]) {
@@ -523,6 +579,22 @@ for.inc:
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
+; Currently, this test-case is not supported.
+;int multi_user_cmp_fmax(float* a, int *b, int n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  float max = -INFINITY;
+;  for (int i = 0; i < n; i++) {
+;    _Bool c = a[i] > max;
+;    if (c) {
+;      max = a[i];
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;  }
+;  return all ? 1 : any ? 2 : 3;
+;}
 define i32 @multi_user_cmp_fmax(ptr readonly %a, ptr readnone %b, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_fmax(
 ; CHECK-SAME: ptr readonly [[A:%.*]], ptr readnone [[B:%.*]], i32 noundef [[N:%.*]]) {
@@ -589,6 +661,21 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
+; Currently, this test-case is not supported.
+;int multi_user_cmp_use_store_offset(float* a, int *b, int n) {
+;  _Bool any = 0;
+;  _Bool all = 1;
+;  for (int i = 0; i < n; i++) {
+;    _Bool c = a[i] < 0.0f;
+;    if (c) {
+;      any = 1;
+;    } else {
+;      all = 0;
+;    }
+;    b[i+c] = any;
+;  }
+;  return all ? 1 : any ? 2 : 3;
+;}
 define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_use_store_offset(
 ; CHECK-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i32 noundef [[N:%.*]]) {
@@ -663,6 +750,7 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
+; Not vectorising, compare instruction user %3 inside the loop
 define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_no_vectorise(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
@@ -729,6 +817,7 @@ for.body:
   br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
 }
 
+; Not vectorising, non recurrent select instrction %3 inside the loop
 define i32 @multi_user_cmp_extra_select(ptr readonly %a, i32 noundef %n) {
 ; CHECK-LABEL: define i32 @multi_user_cmp_extra_select(
 ; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {



More information about the llvm-commits mailing list