[llvm] r351926 - [IRCE] Support narrow latch condition for wide range checks

Tue Jan 22 23:20:56 PST 2019

Author: mkazantsev
Date: Tue Jan 22 23:20:56 2019
New Revision: 351926

URL: http://llvm.org/viewvc/llvm-project?rev=351926&view=rev
Log:
[IRCE] Support narrow latch condition for wide range checks

This patch relaxes restrictions on types of latch condition and range check.
In current implementation, they should match. This patch allows to handle
wide range checks against narrow condition. The motivating example is the
following:

  int N = ...
  for (long i = 0; (int) i < N; i++) {
    if (i >= length) deopt;
  }

In this patch, the option that enables this support is turned off by
default. We'll wait until it is switched to true.

Differential Revision: https://reviews.llvm.org/D56837
Reviewed By: reames

Added:
    llvm/trunk/test/Transforms/IRCE/wide_indvar.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp?rev=351926&r1=351925&r2=351926&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp Tue Jan 22 23:20:56 2019
@@ -115,6 +115,11 @@ static cl::opt<bool> SkipProfitabilityCh
 static cl::opt<bool> AllowUnsignedLatchCondition("irce-allow-unsigned-latch",
                                                  cl::Hidden, cl::init(true));
 
+static cl::opt<bool> AllowNarrowLatchCondition(
+    "irce-allow-narrow-latch", cl::Hidden, cl::init(false),
+    cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
+             "with narrow latch condition."));
+
 static const char *ClonedLoopTag = "irce.loop.clone";
 
 #define DEBUG_TYPE "irce"
@@ -1044,11 +1049,23 @@ LoopStructure::parseLoopStructure(Scalar
   return Result;
 }
 
+/// If the type of \p S matches with \p Ty, return \p S. Otherwise, return
+/// signed or unsigned extension of \p S to type \p Ty.
+static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
+                                bool Signed) {
+  return Signed ? SE.getNoopOrSignExtend(S, Ty) : SE.getNoopOrZeroExtend(S, Ty);
+}
+
 Optional<LoopConstrainer::SubRanges>
 LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
   IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
 
-  if (Range.getType() != Ty)
+  auto *RTy = cast<IntegerType>(Range.getType());
+
+  // We only support wide range checks and narrow latches.
+  if (!AllowNarrowLatchCondition && RTy != Ty)
+    return None;
+  if (RTy->getBitWidth() < Ty->getBitWidth())
     return None;
 
   LoopConstrainer::SubRanges Result;
@@ -1056,8 +1073,10 @@ LoopConstrainer::calculateSubRanges(bool
   // I think we can be more aggressive here and make this nuw / nsw if the
   // addition that feeds into the icmp for the latch's terminating branch is nuw
   // / nsw.  In any case, a wrapping 2's complement addition is safe.
-  const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart);
-  const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
+  const SCEV *Start = NoopOrExtend(SE.getSCEV(MainLoopStructure.IndVarStart),
+                                   RTy, SE, IsSignedPredicate);
+  const SCEV *End = NoopOrExtend(SE.getSCEV(MainLoopStructure.LoopExitAt), RTy,
+                                 SE, IsSignedPredicate);
 
   bool Increasing = MainLoopStructure.IndVarIncreasing;
 
@@ -1067,7 +1086,7 @@ LoopConstrainer::calculateSubRanges(bool
 
   const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr;
 
-  const SCEV *One = SE.getOne(Ty);
+  const SCEV *One = SE.getOne(RTy);
   if (Increasing) {
     Smallest = Start;
     Greatest = End;
@@ -1256,6 +1275,13 @@ LoopConstrainer::RewrittenRangeInfo Loop
   bool IsSignedPredicate = LS.IsSignedPredicate;
 
   IRBuilder<> B(PreheaderJump);
+  auto *RangeTy = Range.getBegin()->getType();
+  auto NoopOrExt = [&](Value *V) {
+    if (V->getType() == RangeTy)
+      return V;
+    return IsSignedPredicate ? B.CreateSExt(V, RangeTy, "wide." + V->getName())
+                             : B.CreateZExt(V, RangeTy, "wide." + V->getName());
+  };
 
   // EnterLoopCond - is it okay to start executing this `LS'?
   Value *EnterLoopCond = nullptr;
@@ -1263,9 +1289,7 @@ LoopConstrainer::RewrittenRangeInfo Loop
       Increasing
           ? (IsSignedPredicate ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT)
           : (IsSignedPredicate ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
-  Value *IndVarStart = LS.IndVarStart;
-  Value *IndVarBase = LS.IndVarBase;
-  Value *LoopExitAt = LS.LoopExitAt;
+  Value *IndVarStart = NoopOrExt(LS.IndVarStart);
   EnterLoopCond = B.CreateICmp(Pred, IndVarStart, ExitSubloopAt);
 
   B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
@@ -1273,6 +1297,7 @@ LoopConstrainer::RewrittenRangeInfo Loop
 
   LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
   B.SetInsertPoint(LS.LatchBr);
+  Value *IndVarBase = NoopOrExt(LS.IndVarBase);
   Value *TakeBackedgeLoopCond = B.CreateICmp(Pred, IndVarBase, ExitSubloopAt);
 
   Value *CondForBranch = LS.LatchBrExitIdx == 1
@@ -1286,6 +1311,7 @@ LoopConstrainer::RewrittenRangeInfo Loop
   // IterationsLeft - are there any more iterations left, given the original
   // upper bound on the induction variable?  If not, we branch to the "real"
   // exit.
+  Value *LoopExitAt = NoopOrExt(LS.LoopExitAt);
   Value *IterationsLeft = B.CreateICmp(Pred, IndVarBase, LoopExitAt);
   B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
 
@@ -1394,7 +1420,7 @@ bool LoopConstrainer::run() {
   SubRanges SR = MaybeSR.getValue();
   bool Increasing = MainLoopStructure.IndVarIncreasing;
   IntegerType *IVTy =
-      cast<IntegerType>(MainLoopStructure.IndVarBase->getType());
+      cast<IntegerType>(Range.getBegin()->getType());
 
   SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
   Instruction *InsertPt = OriginalPreheader->getTerminator();
@@ -1557,6 +1583,12 @@ Optional<InductiveRangeCheck::Range>
 InductiveRangeCheck::computeSafeIterationSpace(
     ScalarEvolution &SE, const SCEVAddRecExpr *IndVar,
     bool IsLatchSigned) const {
+  // We can deal when types of latch check and range checks don't match in case
+  // if latch check is more narrow.
+  auto *IVType = cast<IntegerType>(IndVar->getType());
+  auto *RCType = cast<IntegerType>(getBegin()->getType());
+  if (IVType->getBitWidth() > RCType->getBitWidth())
+    return None;
   // IndVar is of the form "A + B * I" (where "I" is the canonical induction
   // variable, that may or may not exist as a real llvm::Value in the loop) and
   // this inductive range check is a range check on the "C + D * I" ("C" is
@@ -1580,8 +1612,9 @@ InductiveRangeCheck::computeSafeIteratio
   if (!IndVar->isAffine())
     return None;
 
-  const SCEV *A = IndVar->getStart();
-  const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE));
+  const SCEV *A = NoopOrExtend(IndVar->getStart(), RCType, SE, IsLatchSigned);
+  const SCEVConstant *B = dyn_cast<SCEVConstant>(
+      NoopOrExtend(IndVar->getStepRecurrence(SE), RCType, SE, IsLatchSigned));
   if (!B)
     return None;
   assert(!B->isZero() && "Recurrence with zero step?");
@@ -1592,7 +1625,7 @@ InductiveRangeCheck::computeSafeIteratio
     return None;
 
   assert(!D->getValue()->isZero() && "Recurrence with zero step?");
-  unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+  unsigned BitWidth = RCType->getBitWidth();
   const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
 
   // Subtract Y from X so that it does not go through border of the IV

Added: llvm/trunk/test/Transforms/IRCE/wide_indvar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IRCE/wide_indvar.ll?rev=351926&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IRCE/wide_indvar.ll (added)
+++ llvm/trunk/test/Transforms/IRCE/wide_indvar.ll Tue Jan 22 23:20:56 2019
@@ -0,0 +1,459 @@
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -irce-allow-narrow-latch=true -S < %s 2>&1 | FileCheck %s
+; RUN: opt -verify-loop-info -irce-print-changed-loops -passes='require<branch-prob>,loop(irce)' -irce-allow-narrow-latch=true -S < %s 2>&1 | FileCheck %s
+
+; Check that we can remove trivially non-failing range check.
+define i32 @test_increasing_slt_slt_wide_simple_no_postloop() {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_simple_no_postloop(
+; CHECK-NOT:   preloop
+; CHECK-NOT:   postloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp slt i64 %iv, 100
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp slt i32 %narrow.iv, 100
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; This range check fails on the last iteration, so it needs a postloop.
+define i32 @test_increasing_slt_slt_wide_simple_postloop() {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_simple_postloop(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp slt i64 %wide.narrow.iv, 99
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp slt i64 %iv, 99
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp slt i32 %narrow.iv, 100
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; General case. If both %N and %M are non-negative, we do not need a preloop.
+define i32 @test_increasing_slt_slt_wide_non-negative(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_non-negative(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp slt i64 %wide.narrow.iv, %exit.mainloop.at
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  %N = load i32, i32* %n_ptr, !range !2
+  %M = load i64, i64* %m_ptr, !range !1
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp slt i64 %iv, %M
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp slt i32 %narrow.iv, %N
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; General case. Even though %M may be negative, we do not need a preloop because
+; we make a non-negativity runtime check against M and do not go to main loop if
+; M was negative.
+define i32 @test_increasing_slt_slt_wide_general(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_general(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp slt i64
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  %N = load i32, i32* %n_ptr, !range !2
+  %M = load i64, i64* %m_ptr
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp slt i64 %iv, %M
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp slt i32 %narrow.iv, %N
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; General case with preloop.
+define i32 @test_increasing_slt_slt_wide_general_preloop(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_general_preloop(
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp slt i64
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       preloop
+; CHECK:       postloop
+
+entry:
+  %N = load i32, i32* %n_ptr, !range !2
+  %M = load i64, i64* %m_ptr
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp slt i64 %iv, %M
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv to i32
+  %latch.cond = icmp slt i32 %narrow.iv, %N
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; Same as above, multiple checks.
+define i32 @test_increasing_slt_slt_wide_multiple_checks(i32* %n_ptr, i64* %m1_ptr, i64* %m2_ptr, i64* %m3_ptr, i64* %m4_ptr) {
+; CHECK-LABEL: @test_increasing_slt_slt_wide_multiple_checks(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       %c1 = and i1 true, true
+; CHECK:       %c2 = and i1 %c1, true
+; CHECK:       %rc = and i1 %c2, true
+; CHECK:       br i1 %rc, label %backedge, label %check_failed.loopexit
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp slt i64
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  %N = load i32, i32* %n_ptr, !range !2
+  %M1 = load i64, i64* %m1_ptr
+  %M2 = load i64, i64* %m2_ptr
+  %M3 = load i64, i64* %m3_ptr
+  %M4 = load i64, i64* %m4_ptr
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc1 = icmp slt i64 %iv, %M1
+  %rc2 = icmp slt i64 %iv, %M2
+  %rc3 = icmp slt i64 %iv, %M3
+  %rc4 = icmp slt i64 %iv, %M4
+  %c1 = and i1 %rc1, %rc2
+  %c2 = and i1 %c1, %rc3
+  %rc = and i1 %c2, %rc4
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp slt i32 %narrow.iv, %N
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; Wide IV against narrow range check. We don't currently support it.
+define i32 @test_increasing_slt_slt_wide_simple_negtest_narrow_rc() {
+
+; CHECK-LABEL: @test_increasing_slt_slt_wide_simple_negtest_narrow_rc(
+; CHECK-NOT:   i1 true
+; CHECK-NOT:   main
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %narrow.iv = trunc i64 %iv to i32
+  %rc = icmp slt i32 %narrow.iv, 101
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %latch.cond = icmp slt i64 %iv, 100
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; Check that we can remove trivially non-failing range check.
+define i32 @test_increasing_ult_ult_wide_simple_no_postloop() {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_simple_no_postloop(
+; CHECK-NOT:   preloop
+; CHECK-NOT:   postloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp ult i64 %iv, 100
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp ult i32 %narrow.iv, 100
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; This range check fails on the last iteration, so it needs a postloop.
+define i32 @test_increasing_ult_ult_wide_simple_postloop() {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_simple_postloop(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp ult i64 %wide.narrow.iv, 99
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp ult i64 %iv, 99
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp ult i32 %narrow.iv, 100
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; General case. If both %N and %M are non-negative, we do not need a preloop.
+define i32 @test_increasing_ult_ult_wide_non-negative(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_non-negative(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp ult i64 %wide.narrow.iv, %exit.mainloop.at
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  %N = load i32, i32* %n_ptr, !range !2
+  %M = load i64, i64* %m_ptr, !range !1
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp ult i64 %iv, %M
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp ult i32 %narrow.iv, %N
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; General case. Even though %M may be negative, we do not need a preloop because
+; we make a non-negativity runtime check against M and do not go to main loop if
+; M was negative.
+define i32 @test_increasing_ult_ult_wide_general(i32* %n_ptr, i64* %m_ptr) {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_general(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       br i1 true, label %backedge, label %check_failed
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp ult i64
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  %N = load i32, i32* %n_ptr, !range !2
+  %M = load i64, i64* %m_ptr
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc = icmp ult i64 %iv, %M
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp ult i32 %narrow.iv, %N
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; Same as above, multiple checks.
+define i32 @test_increasing_ult_ult_wide_multiple_checks(i32* %n_ptr, i64* %m1_ptr, i64* %m2_ptr, i64* %m3_ptr, i64* %m4_ptr) {
+; CHECK-LABEL: @test_increasing_ult_ult_wide_multiple_checks(
+; CHECK-NOT:   preloop
+; CHECK:       loop:
+; CHECK:       %c1 = and i1 true, true
+; CHECK:       %c2 = and i1 %c1, true
+; CHECK:       %rc = and i1 %c2, true
+; CHECK:       br i1 %rc, label %backedge, label %check_failed.loopexit
+; CHECK:       backedge
+; CHECK:       [[COND:%[^ ]+]] = icmp ult i64
+; CHECK:       br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK:       postloop
+
+entry:
+  %N = load i32, i32* %n_ptr, !range !2
+  %M1 = load i64, i64* %m1_ptr
+  %M2 = load i64, i64* %m2_ptr
+  %M3 = load i64, i64* %m3_ptr
+  %M4 = load i64, i64* %m4_ptr
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %rc1 = icmp ult i64 %iv, %M1
+  %rc2 = icmp ult i64 %iv, %M2
+  %rc3 = icmp ult i64 %iv, %M3
+  %rc4 = icmp ult i64 %iv, %M4
+  %c1 = and i1 %rc1, %rc2
+  %c2 = and i1 %c1, %rc3
+  %rc = and i1 %c2, %rc4
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %narrow.iv = trunc i64 %iv.next to i32
+  %latch.cond = icmp ult i32 %narrow.iv, %N
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+; Wide IV against narrow range check. We don't currently support it.
+define i32 @test_increasing_ult_ult_wide_simple_negtest_narrow_rc() {
+
+; CHECK-LABEL: @test_increasing_ult_ult_wide_simple_negtest_narrow_rc(
+; CHECK-NOT:   i1 true
+; CHECK-NOT:   main
+
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %backedge ]
+  %narrow.iv = trunc i64 %iv to i32
+  %rc = icmp ult i32 %narrow.iv, 101
+  br i1 %rc, label %backedge, label %check_failed
+
+backedge:
+  %iv.next = add i64 %iv, 1
+  %latch.cond = icmp ult i64 %iv, 100
+  br i1 %latch.cond, label %loop, label %exit
+
+exit:
+  ret i32 %narrow.iv
+
+check_failed:
+  ret i32 -1
+}
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{i64 0, i64 9223372036854775807}
+!2 = !{i32 1, i32 2147483647}