[llvm] [LoopVectorize] LLVM fails to vectorise loops with multi-bool varables (PR #89226)

Dinar Temirbulatov via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 18 05:31:41 PDT 2024


https://github.com/dtemirbulatov created https://github.com/llvm/llvm-project/pull/89226

This patch allows to consider compare instructions in the loop with multiple use inside the loop and outside, if we can prove that compare instruction user is a recurrent reduction or used in branching or outside the loop then it is safe to consider to vectorise.

This change allows to vectorise this loop:
int foo(float* a, int n) {
  _Bool any = 0;
  _Bool all = 1;
  for (int i = 0; i < n; i++) {
    if (a[i] < 0.0f) {
      any = 1;
    } else {
      all = 0;
    }
  }
  return all ? 1 : any ? 2 : 3;
}

>From 6bbc7ec56c5afa1a25c8b871508cf3c52e8768de Mon Sep 17 00:00:00 2001
From: Dinar Temirbulatov <Dinar.Temirbulatov at arm.com>
Date: Thu, 18 Apr 2024 11:03:44 +0000
Subject: [PATCH] [LoopVectorize] LLVM fails to vectorise loops with multiple
 bool variables

This patch allows to consider compare instructions in the loop with multiple
use inside the loop and outside, if we can prove that compare instruction user
is a recurrent reduction or used in branching or outside the loop then it is
safe to consider to vectorise.

This change allows to vectorise this loop:
int foo(float* a, int n) {
  _Bool any = 0;
  _Bool all = 1;
  for (int i = 0; i < n; i++) {
    if (a[i] < 0.0f) {
      any = 1;
    } else {
      all = 0;
    }
  }
  return all ? 1 : any ? 2 : 3;
}
---
 llvm/include/llvm/Analysis/IVDescriptors.h    |  20 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |  65 +-
 .../Vectorize/LoopVectorizationLegality.cpp   |  25 +
 .../test/Transforms/LoopVectorize/multicmp.ll | 794 ++++++++++++++++++
 4 files changed, 891 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/multicmp.ll

diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 5c7b613ac48c40..f18ab500c4d9fa 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -76,11 +76,11 @@ class RecurrenceDescriptor {
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
                        Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
-                       unsigned MinWidthCastToRecurTy)
+                       unsigned MinWidthCastToRecurTy, Instruction *Cmp)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
         IsSigned(Signed), IsOrdered(Ordered),
-        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
+        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy), MultiCmp(Cmp) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -88,12 +88,13 @@ class RecurrenceDescriptor {
   class InstDesc {
   public:
     InstDesc(bool IsRecur, Instruction *I, Instruction *ExactFP = nullptr)
-        : IsRecurrence(IsRecur), PatternLastInst(I),
-          RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
+        : IsRecurrence(IsRecur), PatternLastInst(I), RecKind(RecurKind::None),
+          ExactFPMathInst(ExactFP), Cmp(nullptr) {}
 
-    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
+    InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr,
+             Instruction *MultiCmp = nullptr)
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
-          ExactFPMathInst(ExactFP) {}
+          ExactFPMathInst(ExactFP), Cmp(MultiCmp) {}
 
     bool isRecurrence() const { return IsRecurrence; }
 
@@ -105,6 +106,8 @@ class RecurrenceDescriptor {
 
     Instruction *getPatternInst() const { return PatternLastInst; }
 
+    Instruction *getMultiCmp() const { return Cmp; }
+
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
@@ -115,6 +118,8 @@ class RecurrenceDescriptor {
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
     Instruction *ExactFPMathInst;
+    // Mult-user compare instruction.
+    Instruction *Cmp;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -270,6 +275,8 @@ class RecurrenceDescriptor {
            cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
   }
 
+  Instruction *getMultiCmp() const { return MultiCmp; }
+
   /// Reductions may store temporary or final result to an invariant address.
   /// If there is such a store in the loop then, after successfull run of
   /// AddReductionVar method, this field will be assigned the last met store.
@@ -300,6 +307,7 @@ class RecurrenceDescriptor {
   SmallPtrSet<Instruction *, 8> CastInsts;
   // The minimum width used by the recurrence.
   unsigned MinWidthCastToRecurrenceType;
+  Instruction *MultiCmp = nullptr;
 };
 
 /// A struct for saving information about induction variables.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 055f121e743411..811c4b75e07052 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -256,6 +256,7 @@ bool RecurrenceDescriptor::AddReductionVar(
   SmallPtrSet<Instruction *, 4> CastInsts;
   unsigned MinWidthCastToRecurrenceType;
   Instruction *Start = Phi;
+  Instruction *MultiCMP = nullptr;
   bool IsSigned = false;
 
   SmallPtrSet<Instruction *, 8> VisitedInsts;
@@ -400,6 +401,8 @@ bool RecurrenceDescriptor::AddReductionVar(
     }
 
     bool IsASelect = isa<SelectInst>(Cur);
+    if (IsASelect)
+      MultiCMP = ReduxDesc.getMultiCmp();
 
     // A conditional reduction operation must only have 2 or less uses in
     // VisitedInsts.
@@ -597,7 +600,8 @@ bool RecurrenceDescriptor::AddReductionVar(
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
                           FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
+                          MultiCMP);
   RedDes = RD;
 
   return true;
@@ -635,14 +639,59 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
       return InstDesc(Select, Prev.getRecKind());
   }
 
+  SelectInst *SI = dyn_cast<SelectInst>(I);
+  Instruction *Cmp = nullptr;
+
+  if (SI) {
+    bool HasOrigPhiUser = false;
+    bool SelectNonPHIUserInLoop = false;
+    auto Blocks = Loop->getBlocksVector();
+    for (User *U : SI->users()) {
+      Instruction *Inst = dyn_cast<Instruction>(U);
+      if (!Inst)
+        continue;
+      if (Inst == OrigPhi) {
+        HasOrigPhiUser = true;
+      } else {
+        if (std::find(Blocks.begin(), Blocks.end(), Inst->getParent()) !=
+            Blocks.end())
+          SelectNonPHIUserInLoop = true;
+      }
+    }
+    Cmp = dyn_cast<CmpInst>(SI->getOperand(0));
+    if (Cmp && !Cmp->hasOneUse() && HasOrigPhiUser && !SelectNonPHIUserInLoop) {
+      bool IsSafeCMP = true;
+      for (User *U : Cmp->users()) {
+        Instruction *UInst = dyn_cast<Instruction>(U);
+        if (!UInst)
+          continue;
+        if (SelectInst *SI1 = dyn_cast<SelectInst>(U)) {
+          if (!llvm::all_of(SI1->users(), [Blocks](User *USI) {
+                Instruction *Inst1 = dyn_cast<Instruction>(USI);
+                if (!Inst1 || (std::find(Blocks.begin(), Blocks.end(),
+                                         Inst1->getParent()) == Blocks.end() ||
+                               isa<PHINode>(Inst1)))
+                  return true;
+                return false;
+              }))
+            IsSafeCMP = false;
+        }
+        if (IsSafeCMP && !isa<BranchInst>(UInst) && !isa<SelectInst>(UInst) &&
+            std::find(Blocks.begin(), Blocks.end(), UInst->getParent()) !=
+                Blocks.end())
+          IsSafeCMP = false;
+      }
+      if (!IsSafeCMP)
+        Cmp = nullptr;
+    }
+  }
+
   // Only match select with single use cmp condition.
-  if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
-                         m_Value())))
+  if (!Cmp && !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())),
+                                 m_Value(), m_Value())))
     return InstDesc(false, I);
 
-  SelectInst *SI = cast<SelectInst>(I);
   Value *NonPhi = nullptr;
-
   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
     NonPhi = SI->getFalseValue();
   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
@@ -656,8 +705,10 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
   if (!Loop->isLoopInvariant(NonPhi))
     return InstDesc(false, I);
 
-  return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
-                                                     : RecurKind::FAnyOf);
+  return InstDesc(I,
+                  isa<ICmpInst>(I->getOperand(0)) ? RecurKind::IAnyOf
+                                                  : RecurKind::FAnyOf,
+                  nullptr, Cmp);
 }
 
 RecurrenceDescriptor::InstDesc
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index d33743e74cbe31..257be42be0d4f8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -787,6 +787,7 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *Header = TheLoop->getHeader();
+  DenseMap<Instruction *, unsigned> MultiCmpsRed;
 
   // For each block in the loop.
   for (BasicBlock *BB : TheLoop->blocks()) {
@@ -830,6 +831,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
           AllowedExit.insert(RedDes.getLoopExitInstr());
           Reductions[Phi] = RedDes;
+          Instruction *Cmp = RedDes.getMultiCmp();
+          if (Cmp) {
+            if (MultiCmpsRed.contains(Cmp))
+              MultiCmpsRed[Cmp]++;
+            else
+              MultiCmpsRed[Cmp] = 1;
+          }
           continue;
         }
 
@@ -1045,6 +1053,23 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
+  // Make sure that all compare instruction users are recurrent if in loop's BB.
+  if (MultiCmpsRed.size() > 0) {
+    auto Blocks = TheLoop->getBlocksVector();
+    for (auto const &C : MultiCmpsRed) {
+      Instruction *Cmp = C.first;
+      unsigned Counter = 0;
+      for (User *U : Cmp->users()) {
+        SelectInst *Inst = dyn_cast<SelectInst>(U);
+        if (Inst && std::find(Blocks.begin(), Blocks.end(),
+                              Inst->getParent()) != Blocks.end())
+          Counter++;
+      }
+      if (Counter != C.second)
+        return false;
+    }
+  }
+
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.
diff --git a/llvm/test/Transforms/LoopVectorize/multicmp.ll b/llvm/test/Transforms/LoopVectorize/multicmp.ll
new file mode 100644
index 00000000000000..30ef11b8b4b309
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/multicmp.ll
@@ -0,0 +1,794 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK
+
+define i32 @multi_user_cmp(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP11]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_int(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI1]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP2:%.*]] = icmp ne <4 x i1> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP2]])
+; CHECK-NEXT:    [[RDX_SELECT3:%.*]] = select i1 [[TMP8]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX4:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP9]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX4]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 0
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp slt i32 %2, 0
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP17:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP17]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI2]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 1
+; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP21]], [[PRED_STORE_IF5]] ]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP27]], [[PRED_STORE_IF7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP31:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP31]], i1 true, i1 false
+; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP32]], i1 false, i1 true
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[IF_END6:%.*]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP34:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP33]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP34]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ANY_0_OFF020:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ALL_0_OFF018:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[IF_END6]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP35]], 0.000000e+00
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF018]], i1 false
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF020]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP36]], 1
+; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    br label [[IF_END6]]
+; CHECK:       if.end6:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+;
+entry:
+  %cmp17 = icmp sgt i32 %n, 0
+  br i1 %cmp17, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %if.end6 ]
+  %any.0.off020 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %if.end6 ]
+  %all.0.off018 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %if.end6 ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off018, i1 false
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off020
+  br i1 %cmp1, label %if.then3, label %if.end6
+
+if.then3:
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %3 = load i32, ptr %arrayidx5, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, ptr %arrayidx5, align 4
+  br label %if.end6
+
+if.end6:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, ptr %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP20:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP20]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i1> [ <i1 true, i1 true, i1 true, i1 true>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META13:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[VEC_PHI2]]
+; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], 1
+; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP8]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK:       pred.store.if3:
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw i32 [[TMP15]], 1
+; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP14]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
+; CHECK:       pred.store.continue4:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK:       pred.store.if5:
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
+; CHECK:       pred.store.continue6:
+; CHECK-NEXT:    [[TMP23:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP21]], [[PRED_STORE_IF5]] ]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1
+; CHECK-NEXT:    store i32 [[TMP28]], ptr [[TMP26]], align 4, !alias.scope [[META16]], !noalias [[META13]]
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_STORE_CONTINUE6]] ], [ [[TMP27]], [[PRED_STORE_IF7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP32:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP32]], i1 false, i1 true
+; CHECK-NEXT:    [[RDX_SELECT_CMP9:%.*]] = icmp ne <4 x i1> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP33:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP9]])
+; CHECK-NEXT:    [[RDX_SELECT10:%.*]] = select i1 [[TMP33]], i1 true, i1 false
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i1 [ true, [[VECTOR_MEMCHECK]] ], [ true, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi i1 [ false, [[VECTOR_MEMCHECK]] ], [ false, [[FOR_BODY_PREHEADER]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1:%.*]], [[FOR_INC:%.*]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_INC]] ], [ [[RDX_SELECT10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP34:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-NEXT:    [[TMP35:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP34]], i32 [[TMP35]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP36]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ALL_0_OFF022:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ANY_0_OFF021:%.*]] = phi i1 [ [[BC_MERGE_RDX11]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP37:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1]] = fcmp olt float [[TMP37]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF021]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF022]], i1 false
+; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[FOR_INC]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP38]], 1
+; CHECK-NEXT:    store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+;
+entry:
+  %cmp20 = icmp sgt i32 %n, 0
+  br i1 %cmp20, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = zext i1 %cmp1 to i32
+  %1 = select i1 %.any.0.off0, i32 2, i32 3
+  %2 = select i1 %all.0.off0., i32 %0, i32 %1
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 0, %entry ], [ %2, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
+  %all.0.off022 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.inc ]
+  %any.0.off021 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.inc ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %3 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %3, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off021
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off022, i1 false
+  br i1 %cmp1, label %if.then3, label %for.inc
+
+if.then3:
+  %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %4 = load i32, ptr %arrayidx5, align 4
+  %inc = add nsw i32 %4, 1
+  store i32 %inc, ptr %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_fmax(ptr readonly %a, ptr readnone %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr readnone [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP13:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP13]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF017:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[FOR_BODY_PREHEADER]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF014:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[TMP2]], [[MAX_015]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF014]], i1 false
+; CHECK-NEXT:    [[DOTMAX_0]] = select i1 [[CMP1]], float [[TMP2]], float [[MAX_015]]
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF017]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %any.0.off017 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %max.015 = phi float [ 0xFFF0000000000000, %for.body.preheader ], [ %.max.0, %for.body ]
+  %all.0.off014 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp ogt float %2, %max.015
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off014, i1 false
+  %.max.0 = select i1 %cmp1, float %2, float %max.015
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off017
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP19:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF022:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF020:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP2]], 0.000000e+00
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF020]], i1 false
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF022]]
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[DOTANY_0_OFF0]] to i32
+; CHECK-NEXT:    [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N]]
+; CHECK-NEXT:    [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp19 = icmp sgt i32 %n, 0
+  br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %any.0.off022 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %all.0.off020 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off020, i1 false
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off022
+  %conv = zext i1 %.any.0.off0 to i32
+  %conv4 = zext i1 %cmp1 to i32
+  %add = add nuw nsw i32 %conv4, %n
+  %idxprom5 = zext nneg i32 %add to i64
+  %arrayidx6 = getelementptr inbounds i32, ptr %b, i64 %idxprom5
+  store i32 %conv, ptr %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt float [[TMP2]], 0.000000e+00
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], [[INDVARS_IV]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
+  %2 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp olt float %2, 0.000000e+00
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  %3 = sext i1 %cmp1 to i64
+  %4 = add i64 %3, %indvars.iv
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @multi_user_cmp_extra_select(ptr readonly %a, i32 noundef %n) {
+; CHECK-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-SAME: ptr readonly [[A:%.*]], i32 noundef [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ALL_0_OFF0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP1]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    ret i32 [[ALL_0_OFF0_LCSSA]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[FOR_BODY_PREHEADER]] ], [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[FOR_BODY_PREHEADER]] ], [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP2]], 0
+; CHECK-NEXT:    [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-NEXT:    [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %0 = select i1 %.any.0.off0, i32 2, i32 3
+  %1 = select i1 %all.0.off0., i32 1, i32 %0
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %all.0.off0.lcssa = phi i32 [ 1, %entry ], [ %1, %for.cond.cleanup.loopexit ]
+  ret i32 %all.0.off0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %all.0.off010 = phi i1 [ true, %for.body.preheader ], [ %all.0.off0., %for.body ]
+  %any.0.off09 = phi i1 [ false, %for.body.preheader ], [ %.any.0.off0, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx, align 4
+  %cmp1 = icmp slt i32 %2, 0
+  %.any.0.off0 = select i1 %cmp1, i1 true, i1 %any.0.off09
+  %all.0.off0. = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %3 = select i1 %cmp1, i1 %all.0.off010, i1 false
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}



More information about the llvm-commits mailing list