[llvm] r364733 - [ARM] WLS/LE Code Generation

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 01:21:28 PDT 2019


Author: sam_parker
Date: Mon Jul  1 01:21:28 2019
New Revision: 364733

URL: http://llvm.org/viewvc/llvm-project?rev=364733&view=rev
Log:
[ARM] WLS/LE Code Generation
    
Backend changes to enable WLS/LE low-overhead loops for armv8.1-m:
1) Use TTI to communicate to the HardwareLoop pass that we should try
   to generate intrinsics that guard the loop entry, as well as setting
   the loop trip count.
2) Lower the BRCOND that uses said intrinsic to an Arm specific node:
   ARMWLS.
3) ISelDAGToDAG the node to a new pseudo instruction:
   t2WhileLoopStart.
4) Add support in ArmLowOverheadLoops to handle the new pseudo
   instruction.

Differential Revision: https://reviews.llvm.org/D63816

Added:
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir
    llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir
Removed:
    llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir
Modified:
    llvm/trunk/lib/CodeGen/HardwareLoops.cpp
    llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
    llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
    llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll
    llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll
    llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll
    llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll

Modified: llvm/trunk/lib/CodeGen/HardwareLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/HardwareLoops.cpp?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/HardwareLoops.cpp (original)
+++ llvm/trunk/lib/CodeGen/HardwareLoops.cpp Mon Jul  1 01:21:28 2019
@@ -294,6 +294,7 @@ static bool CanGenerateTest(Loop *L, Val
   // Check that the icmp is checking for equality of Count and zero and that
   // a non-zero value results in entering the loop.
   auto ICmp = cast<ICmpInst>(BI->getCondition());
+  LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
   if (!ICmp->isEquality())
     return false;
 

Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon Jul  1 01:21:28 2019
@@ -2998,6 +2998,16 @@ void ARMDAGToDAGISel::Select(SDNode *N)
     // Other cases are autogenerated.
     break;
   }
+  case ARMISD::WLS: {
+    SDValue Ops[] = { N->getOperand(1),   // Loop count
+                      N->getOperand(2),   // Exit target
+                      N->getOperand(0) };
+    SDNode *LoopStart =
+      CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops);
+    ReplaceUses(N, LoopStart);
+    CurDAG->RemoveDeadNode(N);
+    return;
+  }
   case ARMISD::BRCOND: {
     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Jul  1 01:21:28 2019
@@ -633,6 +633,10 @@ ARMTargetLowering::ARMTargetLowering(con
   if (Subtarget->hasMVEIntegerOps())
     addMVEVectorTypes(Subtarget->hasMVEFloatOps());
 
+  // Combine low-overhead loop intrinsics so that we can lower i1 types.
+  if (Subtarget->hasLOB())
+    setTargetDAGCombine(ISD::BRCOND);
+
   if (Subtarget->hasNEON()) {
     addDRTypeForNEON(MVT::v2f32);
     addDRTypeForNEON(MVT::v8i8);
@@ -1542,6 +1546,7 @@ const char *ARMTargetLowering::getTarget
   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
+  case ARMISD::WLS:           return "ARMISD::WLS";
   }
   return nullptr;
 }
@@ -12883,6 +12888,42 @@ SDValue ARMTargetLowering::PerformCMOVTo
   return V;
 }
 
+static SDValue PerformHWLoopCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const ARMSubtarget *ST) {
+  // Look for (brcond (xor test.set.loop.iterations, -1)
+  SDValue CC = N->getOperand(1);
+
+  if (CC->getOpcode() != ISD::XOR && CC->getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  if (CC->getOperand(0)->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return SDValue();
+
+  SDValue Int = CC->getOperand(0);
+  unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
+  if (IntOp != Intrinsic::test_set_loop_iterations)
+    return SDValue();
+
+  if (auto *Const = dyn_cast<ConstantSDNode>(CC->getOperand(1)))
+    assert(Const->isOne() && "Expected to compare against 1");
+  else
+    assert(Const->isOne() && "Expected to compare against 1");
+
+  SDLoc dl(Int);
+  SDValue Chain = N->getOperand(0);
+  SDValue Elements = Int.getOperand(2);
+  SDValue ExitBlock = N->getOperand(2);
+
+  // TODO: Once we start supporting tail predication, we can add another
+  // operand to WLS for the number of elements processed in a vector loop.
+
+  SDValue Ops[] = { Chain, Elements, ExitBlock };
+  SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+  DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
+  return Res;
+}
+
 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
 SDValue
 ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
@@ -13114,6 +13155,7 @@ SDValue ARMTargetLowering::PerformDAGCom
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
   case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
   case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
+  case ISD::BRCOND:     return PerformHWLoopCombine(N, DCI, Subtarget);
   case ARMISD::ADDC:
   case ARMISD::SUBC:    return PerformAddcSubcCombine(N, DCI, Subtarget);
   case ARMISD::SUBE:    return PerformAddeSubeCombine(N, DCI, Subtarget);

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Mon Jul  1 01:21:28 2019
@@ -125,6 +125,8 @@ class VectorType;
       WIN__CHKSTK,  // Windows' __chkstk call to do stack probing.
       WIN__DBZCHK,  // Windows' divide by zero check
 
+      WLS,          // Low-overhead loops, While Loop Start
+
       VCEQ,         // Vector compare equal.
       VCEQZ,        // Vector compare equal to zero.
       VCGE,         // Vector compare greater than or equal.

Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Mon Jul  1 01:21:28 2019
@@ -106,6 +106,11 @@ def SDT_ARMIntShiftParts : SDTypeProfile
                                               SDTCisInt<0>,
                                               SDTCisInt<4>]>;
 
+// TODO Add another operand for 'Size' so that we can re-use this node when we
+// start supporting *TP versions.
+def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
+                                            SDTCisVT<1, OtherVT>]>;
+
 def ARMSmlald        : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
 def ARMSmlaldx       : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
 def ARMSmlsld        : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
@@ -244,6 +249,9 @@ def SDTARMVGETLN  : SDTypeProfile<1, 2,
 def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
 def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
 
+def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
+                    [SDNPHasChain]>;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 

Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Mon Jul  1 01:21:28 2019
@@ -5216,11 +5216,19 @@ def t2LoopDec :
   t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
                4, IIC_Br, []>, Sched<[WriteBr]>;
 
-let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in
+let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in {
+def t2WhileLoopStart :
+    t2PseudoInst<(outs),
+                 (ins rGPR:$elts, brtarget:$target),
+                 4, IIC_Br, []>,
+                 Sched<[WriteBr]>;
+
 def t2LoopEnd :
   t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
   8, IIC_Br, []>, Sched<[WriteBr]>;
 
+} // end isBranch, isTerminator, hasSideEffects
+
 } // end isNotDuplicable
 
 class CS<string iname, bits<4> opcode, list<dag> pattern=[]>

Modified: llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp Mon Jul  1 01:21:28 2019
@@ -105,15 +105,20 @@ bool ARMLowOverheadLoops::ProcessLoop(Ma
   LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
 
   auto IsLoopStart = [](MachineInstr &MI) {
-    return MI.getOpcode() == ARM::t2DoLoopStart;
+    return MI.getOpcode() == ARM::t2DoLoopStart ||
+           MI.getOpcode() == ARM::t2WhileLoopStart;
   };
 
-  auto SearchForStart =
-    [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr* {
+  // Search the given block for a loop start instruction. If one isn't found,
+  // and there's only one predecessor block, search that one too.
+  std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
+    [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
     for (auto &MI : *MBB) {
       if (IsLoopStart(MI))
         return &MI;
     }
+    if (MBB->pred_size() == 1)
+      return SearchForStart(*MBB->pred_begin());
     return nullptr;
   };
 
@@ -122,8 +127,28 @@ bool ARMLowOverheadLoops::ProcessLoop(Ma
   MachineInstr *End = nullptr;
   bool Revert = false;
 
-  if (auto *Preheader = ML->getLoopPreheader())
+  // Search the preheader for the start intrinsic, or look through the
+  // predecessors of the header to find exactly one set.iterations intrinsic.
+  // FIXME: I don't see why we shouldn't be supporting multiple predecessors
+  // with potentially multiple set.loop.iterations, so we need to enable this.
+  if (auto *Preheader = ML->getLoopPreheader()) {
     Start = SearchForStart(Preheader);
+  } else {
+    LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
+               << " - Performing manual predecessor search.\n");
+    MachineBasicBlock *Pred = nullptr;
+    for (auto *MBB : ML->getHeader()->predecessors()) {
+      if (!ML->contains(MBB)) {
+        if (Pred) {
+          LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
+          Start = nullptr;
+          break;
+        }
+        Pred = MBB;
+        Start = SearchForStart(MBB);
+      }
+    }
+  }
 
   // Find the low-overhead loop components and decide whether or not to fall
   // back to a normal loop.
@@ -158,12 +183,11 @@ bool ARMLowOverheadLoops::ProcessLoop(Ma
       break;
   }
 
-  if (Start || Dec || End) {
-    if (!Start || !Dec || !End)
-      report_fatal_error("Failed to find all loop components");
-  } else {
+  if (!Start && !Dec && !End) {
     LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
     return Changed;
+  } if (!(Start && Dec && End)) {
+    report_fatal_error("Failed to find all loop components");
   }
 
   if (!End->getOperand(1).isMBB() ||
@@ -212,15 +236,21 @@ void ARMLowOverheadLoops::Expand(Machine
       break;
     }
 
+    unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
+      ARM::t2DLS : ARM::t2WLS;
     MachineInstrBuilder MIB =
-      BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(ARM::t2DLS));
-    if (InsertPt != Start)
-      InsertPt->eraseFromParent();
+      BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
 
     MIB.addDef(ARM::LR);
     MIB.add(Start->getOperand(0));
-    LLVM_DEBUG(dbgs() << "ARM Loops: Inserted DLS: " << *MIB);
+    if (Opc == ARM::t2WLS)
+      MIB.add(Start->getOperand(1));
+
+    if (InsertPt != Start)
+      InsertPt->eraseFromParent();
     Start->eraseFromParent();
+    LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
+    return &*MIB;
   };
 
   // Combine the LoopDec and LoopEnd instructions into LE(TP).
@@ -234,24 +264,15 @@ void ARMLowOverheadLoops::Expand(Machine
     MIB.add(End->getOperand(1));
     LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
 
-    // If there is a branch after loop end, which branches to the fallthrough
-    // block, remove the branch.
-    MachineBasicBlock *Latch = End->getParent();
-    MachineInstr *Terminator = &Latch->instr_back();
-    if (End != Terminator) {
-      MachineBasicBlock *Exit = ML->getExitBlock();
-      if (Latch->isLayoutSuccessor(Exit)) {
-        LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop exit branch: "
-                   << *Terminator);
-        Terminator->eraseFromParent();
-      }
-    }
     End->eraseFromParent();
     Dec->eraseFromParent();
+    return &*MIB;
   };
 
   // Generate a subs, or sub and cmp, and a branch instead of an LE.
   // TODO: Check flags so that we can possibly generate a subs.
+  // FIXME: Need to check that we're not trashing the CPSR when generating
+  // the cmp.
   auto ExpandBranch = [this](MachineInstr *Dec, MachineInstr *End) {
     LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n");
     // Create sub
@@ -282,12 +303,53 @@ void ARMLowOverheadLoops::Expand(Machine
     Dec->eraseFromParent();
   };
 
+  // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
+  // beq that branches to the exit branch.
+  // FIXME: Need to check that we're not trashing the CPSR when generating the
+  // cmp. We could also try to generate a cbz if the value in LR is also in
+  // another low register.
+  auto ExpandStart = [this](MachineInstr *MI) {
+    MachineBasicBlock *MBB = MI->getParent();
+    MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
+                                      TII->get(ARM::t2CMPri));
+    MIB.addReg(ARM::LR);
+    MIB.addImm(0);
+    MIB.addImm(ARMCC::AL);
+    MIB.addReg(ARM::CPSR);
+
+    MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2Bcc));
+    MIB.add(MI->getOperand(1));   // branch target
+    MIB.addImm(ARMCC::EQ);        // condition code
+    MIB.addReg(ARM::CPSR);
+  };
+
+  // TODO: We should be able to automatically remove these branches before we
+  // get here - probably by teaching analyzeBranch about the pseudo
+  // instructions.
+  // If there is an unconditional branch, after I, that just branches to the
+  // next block, remove it.
+  auto RemoveDeadBranch = [](MachineInstr *I) {
+    MachineBasicBlock *BB = I->getParent();
+    MachineInstr *Terminator = &BB->instr_back();
+    if (Terminator->isUnconditionalBranch() && I != Terminator) {
+      MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
+      if (BB->isLayoutSuccessor(Succ)) {
+        LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
+        Terminator->eraseFromParent();
+      }
+    }
+  };
+
   if (Revert) {
-    Start->eraseFromParent();
+    if (Start->getOpcode() == ARM::t2WhileLoopStart)
+      ExpandStart(Start);
     ExpandBranch(Dec, End);
+    Start->eraseFromParent();
   } else {
-    ExpandLoopStart(ML, Start);
-    ExpandLoopEnd(ML, Dec, End);
+    Start = ExpandLoopStart(ML, Start);
+    RemoveDeadBranch(Start);
+    End = ExpandLoopEnd(ML, Dec, End);
+    RemoveDeadBranch(End);
   }
 }
 

Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp Mon Jul  1 01:21:28 2019
@@ -806,6 +806,7 @@ bool ARMTTIImpl::isHardwareLoopProfitabl
       default:
         break;
       case Intrinsic::set_loop_iterations:
+      case Intrinsic::test_set_loop_iterations:
       case Intrinsic::loop_decrement:
       case Intrinsic::loop_decrement_reg:
         return true;
@@ -841,6 +842,7 @@ bool ARMTTIImpl::isHardwareLoopProfitabl
   LLVMContext &C = L->getHeader()->getContext();
   HWLoopInfo.CounterInReg = true;
   HWLoopInfo.IsNestingLegal = false;
+  HWLoopInfo.PerformEntryTest = true;
   HWLoopInfo.CountType = Type::getInt32Ty(C);
   HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
   return true;

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,115 @@
+# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: $lr = tMOVr $r0, 13, $noreg
+# CHECK: $lr = t2DLS killed $r0
+# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main"
+  
+  define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
+  entry:
+    %scevgep = getelementptr i32, i32* %q, i32 -1
+    %scevgep3 = getelementptr i32, i32* %p, i32 -1
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %while.body
+  
+  while.body:
+    %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
+    %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
+    %0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
+    %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 1
+    %1 = load i32, i32* %scevgep2, align 4
+    store i32 %1, i32* %scevgep6, align 4
+    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
+    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
+    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %3 = icmp ne i32 %2, 0
+    br i1 %3, label %while.body, label %while.end
+  
+  while.end:
+    ret i32 0
+  }
+  
+  declare void @llvm.set.loop.iterations.i32(i32) #0
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
+  declare void @llvm.stackprotector(i8*, i8**) #1
+  
+  attributes #0 = { noduplicate nounwind }
+  attributes #1 = { nounwind }
+
+...
+---
+name:            do_copy
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $r0, $r1, $r2, $r7, $lr
+  
+    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $lr = tMOVr $r0, 13, $noreg
+    t2DoLoopStart killed $r0
+    renamable $r0 = t2SUBri killed renamable $r1, 4, 14, $noreg, $noreg
+    renamable $r1 = t2SUBri killed renamable $r2, 4, 14, $noreg, $noreg
+  
+  bb.1.while.body:
+    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $r0, $r1
+  
+    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep2)
+    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep6)
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.1
+    t2B %bb.2, 14, $noreg
+  
+  bb.2.while.end:
+    $r0 = t2MOVi 0, 14, $noreg, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit killed $r0
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll Mon Jul  1 01:21:28 2019
@@ -0,0 +1,213 @@
+; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL
+
+; Not implemented as a mir test so that changes the generic HardwareLoop can
+; also be tested. These functions have been taken from
+; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation
+; of a few test.set intrinsics, but only one (ne_trip_count) gets generated
+; here. Simplifications result in icmps changing and maybe also the CFG. So,
+; TODO: Teach the HardwareLoops some better pattern recognition.
+
+; CHECK-GLOBAL-NOT: DoLoopStart
+; CHECK-GLOBAL-NOT: WhileLoopStart
+; CHECK-GLOBAL-NOT: LoopEnd
+
+; CHECK: ne_and_guard
+; CHECK: body:
+; CHECK: bb.0.entry:
+; CHECK:   t2CMPri renamable $lr, 0
+; CHECK:   tBcc %bb.3
+; CHECK: bb.1.while.body.preheader:
+; CHECK:   $lr = t2DLS renamable $lr
+; CHECK: bb.2.while.body:
+; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
+define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
+entry:
+  %brmerge.demorgan = and i1 %t1, %t2
+  %cmp6 = icmp ne i32 %N, 0
+  %or.cond = and i1 %brmerge.demorgan, %cmp6
+  br i1 %or.cond, label %while.body, label %if.end
+
+while.body:                                       ; preds = %while.body, %entry
+  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ]
+  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
+  %tmp = load i32, i32* %b.addr.07, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
+  store i32 %tmp, i32* %a.addr.08, align 4
+  %inc = add nuw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %if.end, label %while.body
+
+if.end:                                           ; preds = %while.body, %entry
+  ret void
+}
+
+; TODO: This could generate WLS
+; CHECK: ne_preheader
+; CHECK: body:
+; CHECK: bb.0.entry:
+; CHECK:   t2CMPri renamable $lr, 0
+; CHECK:   tBcc %bb.3
+; CHECK: bb.1.while.body.preheader:
+; CHECK:   $lr = t2DLS renamable $lr
+; CHECK: bb.2.while.body:
+; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
+define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
+entry:
+  %brmerge.demorgan = and i1 %t1, %t2
+  br i1 %brmerge.demorgan, label %while.preheader, label %if.end
+
+while.preheader:                                  ; preds = %entry
+  %cmp = icmp ne i32 %N, 0
+  br i1 %cmp, label %while.body, label %if.end
+
+while.body:                                       ; preds = %while.body, %while.preheader
+  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
+  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
+  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
+  %tmp = load i32, i32* %b.addr.07, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
+  store i32 %tmp, i32* %a.addr.08, align 4
+  %inc = add nuw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %if.end, label %while.body
+
+if.end:                                           ; preds = %while.body, %while.preheader, %entry
+  ret void
+}
+
+; TODO: This could generate WLS
+; CHECK: eq_preheader
+; CHECK: body:
+; CHECK: bb.0.entry:
+; CHECK:   t2CMPri renamable $lr, 0
+; CHECK:   tBcc %bb.3
+; CHECK: bb.1.while.body.preheader:
+; CHECK:   $lr = t2DLS renamable $lr
+; CHECK: bb.2.while.body:
+; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
+define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
+entry:
+  %brmerge.demorgan = and i1 %t1, %t2
+  br i1 %brmerge.demorgan, label %while.preheader, label %if.end
+
+while.preheader:                                  ; preds = %entry
+  %cmp = icmp eq i32 %N, 0
+  br i1 %cmp, label %if.end, label %while.body
+
+while.body:                                       ; preds = %while.body, %while.preheader
+  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
+  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
+  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
+  %tmp = load i32, i32* %b.addr.07, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
+  store i32 %tmp, i32* %a.addr.08, align 4
+  %inc = add nuw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %if.end, label %while.body
+
+if.end:                                           ; preds = %while.body, %while.preheader, %entry
+  ret void
+}
+
+; TODO: This could generate WLS
+; CHECK: ne_prepreheader
+; CHECK: body:
+; CHECK: bb.0.entry:
+; CHECK:   t2CMPri renamable $lr, 0
+; CHECK:   tBcc %bb.3
+; CHECK: bb.1.while.body.preheader:
+; CHECK:   $lr = t2DLS renamable $lr
+; CHECK: bb.2.while.body:
+; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
+define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
+entry:
+  %cmp = icmp ne i32 %N, 0
+  br i1 %cmp, label %while.preheader, label %if.end
+
+while.preheader:                                  ; preds = %entry
+  %brmerge.demorgan = and i1 %t1, %t2
+  br i1 %brmerge.demorgan, label %while.body, label %if.end
+
+while.body:                                       ; preds = %while.body, %while.preheader
+  %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
+  %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
+  %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
+  %tmp = load i32, i32* %b.addr.07, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
+  store i32 %tmp, i32* %a.addr.08, align 4
+  %inc = add nuw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %if.end, label %while.body
+
+if.end:                                           ; preds = %while.body, %while.preheader, %entry
+  ret void
+}
+
+; CHECK: be_ne
+; CHECK: body:
+; CHECK: bb.0.entry:
+; CHECK:   $lr = t2DLS renamable $lr
+; CHECK: bb.1.do.body:
+; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.1
+define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
+entry:
+  %cmp = icmp ne i32 %N, 0
+  %sub = sub i32 %N, 1
+  %be = select i1 %cmp, i32 0, i32 %sub
+  %cmp.1 = icmp ne i32 %be, 0
+  br i1 %cmp.1, label %do.body, label %if.end
+
+do.body:                                          ; preds = %do.body, %entry
+  %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ]
+  %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ]
+  %i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
+  %tmp = load i32, i32* %b.addr.0, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
+  store i32 %tmp, i32* %a.addr.0, align 4
+  %inc = add nuw i32 %i.0, 1
+  %cmp.2 = icmp ult i32 %inc, %N
+  br i1 %cmp.2, label %do.body, label %if.end
+
+if.end:                                           ; preds = %do.body, %entry
+  ret void
+}
+
+; TODO: Remove the tMOVr in the preheader!
+; CHECK: ne_trip_count
+; CHECK: body:
+; CHECK: bb.0.entry:
+; CHECK:   $lr = t2WLS $r3, %bb.3
+; CHECK: bb.1.do.body.preheader:
+; CHECK:   $lr = tMOVr
+; CHECK: bb.2.do.body:
+; CHECK:   $lr = t2LEUpdate renamable $lr, %bb.2
+define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
+entry:
+  br label %do.body.preheader
+
+do.body.preheader:
+  %cmp = icmp ne i32 %N, 0
+  br i1 %cmp, label %do.body, label %if.end
+
+do.body:
+  %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ]
+  %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ]
+  %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
+  %tmp = load i32, i32* %b.addr.0, align 4
+  %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
+  store i32 %tmp, i32* %a.addr.0, align 4
+  %inc = add nuw i32 %i.0, 1
+  %cmp.1 = icmp ult i32 %inc, %N
+  br i1 %cmp.1, label %do.body, label %if.end
+
+if.end:                                           ; preds = %do.body, %entry
+  ret void
+}

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/massive.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,145 @@
+# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: for.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  ; Function Attrs: norecurse nounwind
+  define dso_local arm_aapcscc void @massive(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+  entry:
+    %cmp8 = icmp eq i32 %N, 0
+    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:                               ; preds = %entry
+    %scevgep = getelementptr i32, i32* %a, i32 -1
+    %scevgep4 = getelementptr i32, i32* %c, i32 -1
+    %scevgep8 = getelementptr i32, i32* %b, i32 -1
+    call void @llvm.set.loop.iterations.i32(i32 %N)
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    ret void
+  
+  for.body:                                         ; preds = %for.body, %for.body.preheader
+    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
+    %size = call i32 @llvm.arm.space(i32 4096, i32 undef)
+    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
+    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
+    %mul = mul nsw i32 %2, %1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+    store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
+    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.body, label %for.cond.cleanup
+  }
+  
+  declare i32 @llvm.arm.space(i32, i32) #1
+  declare void @llvm.set.loop.iterations.i32(i32) #2
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+  
+  attributes #1 = { nounwind }
+  attributes #2 = { noduplicate nounwind }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"int", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            massive
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+  
+    frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $r7 = frame-setup tMOVr $sp, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_register $r7
+    tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
+    t2IT 0, 8, implicit-def $itstate
+    tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
+    renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
+    renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
+    $lr = tMOVr $r3, 14, $noreg
+    t2DoLoopStart killed $r3
+  
+  bb.1.for.body:
+    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  
+    dead renamable $r3 = SPACE 4096, undef renamable $r0
+    renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
+    renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
+    renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
+    early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.1
+    tB %bb.2, 14, $noreg
+  
+  bb.2.for.cond.cleanup:
+    tPOP_RET 14, $noreg, def $r7, def $pc
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/multiblock-massive.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: for.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+  entry:
+    %cmp8 = icmp eq i32 %N, 0
+    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:                               ; preds = %entry
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.end, %entry
+    ret void
+  
+  for.body:                                         ; preds = %for.body.preheader, %for.end
+    %lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ]
+    %lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ]
+    %lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ]
+    %lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
+    %size = call i32 @llvm.arm.space(i32 3072, i32 undef)
+    %0 = load i32, i32* %lsr.iv4, align 4, !tbaa !3
+    %1 = load i32, i32* %lsr.iv2, align 4, !tbaa !3
+    %mul = mul nsw i32 %1, %0
+    store i32 %mul, i32* %lsr.iv1, align 4, !tbaa !3
+    %cmp = icmp ne i32 %0, 0
+    br i1 %cmp, label %middle.block, label %for.end
+  
+  middle.block:                                     ; preds = %for.body
+    %div = udiv i32 %1, %0
+    store i32 %div, i32* %lsr.iv1, align 4, !tbaa !3
+    %size.1 = call i32 @llvm.arm.space(i32 1024, i32 undef)
+    br label %for.end
+  
+  for.end:                                          ; preds = %middle.block, %for.body
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1
+    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
+    %exitcond = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond, label %for.cond.cleanup, label %for.body
+  }
+  
+  declare i32 @llvm.arm.space(i32, i32) #1
+  attributes #1 = { nounwind }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"int", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            size_limit
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: -8
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.3(0x50000000)
+  
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $r6, $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r6, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    $r7 = frame-setup tADDrSPi $sp, 2, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa $r7, 8
+    tCBNZ $r3, %bb.3
+  
+  bb.1.for.cond.cleanup:
+    tPOP_RET 14, $noreg, def $r4, def $r6, def $r7, def $pc
+  
+  bb.2.for.end:
+    successors: %bb.1(0x04000000), %bb.3(0x7c000000)
+  
+    renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg
+    renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14, $noreg
+    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg
+    renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 1, 14, $noreg
+    tBcc %bb.1, 0, killed $cpsr
+  
+  bb.3.for.body:
+    successors: %bb.4(0x50000000), %bb.2(0x30000000)
+  
+    dead renamable $r12 = SPACE 3072, undef renamable $r0
+    renamable $r12 = t2LDRi12 renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4, !tbaa !3)
+    renamable $lr = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2, !tbaa !3)
+    t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr
+    renamable $r4 = nsw t2MUL renamable $lr, renamable $r12, 14, $noreg
+    tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
+    t2Bcc %bb.2, 0, killed $cpsr
+  
+  bb.4.middle.block:
+    successors: %bb.2(0x80000000)
+  
+    renamable $r4 = t2UDIV killed renamable $lr, killed renamable $r12, 14, $noreg
+    tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
+    dead renamable $r4 = SPACE 1024, undef renamable $r0
+    t2B %bb.2, 14, $noreg
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
+
+# CHECK: while.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-arm-none-eabi"
+  
+  define i32 @skip_spill(i32 %n) #0 {
+  entry:
+    %cmp6 = icmp eq i32 %n, 0
+    br i1 %cmp6, label %while.end, label %while.body.preheader
+  
+  while.body.preheader:                             ; preds = %entry
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %while.body
+  
+  while.body:                                       ; preds = %while.body, %while.body.preheader
+    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+    %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
+    %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
+    %add = add nsw i32 %call, %res.07
+    %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %2 = icmp ne i32 %1, 0
+    br i1 %2, label %while.body, label %while.end
+  
+  while.end:                                        ; preds = %while.body, %entry
+    %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
+    ret i32 %res.0.lcssa
+  }
+  
+  declare i32 @bar(...) local_unnamed_addr #0
+  declare void @llvm.set.loop.iterations.i32(i32) #1
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+  
+  attributes #0 = { "target-features"="+mve.fp" }
+  attributes #1 = { noduplicate nounwind }
+  attributes #2 = { nounwind }
+
+...
+---
+name:            skip_spill
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.4(0x30000000), %bb.1(0x50000000)
+  
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r5, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    tCBZ $r0, %bb.4
+  
+  bb.1.while.body.preheader:
+    successors: %bb.2(0x80000000)
+  
+    $lr = tMOVr $r0, 14, $noreg
+    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
+    t2DoLoopStart killed $r0
+  
+  bb.2.while.body:
+    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  
+    $r5 = tMOVr killed $lr, 14, $noreg
+    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
+    $lr = tMOVr killed $r5, 14, $noreg
+    renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.2
+    tB %bb.3, 14, $noreg
+  
+  bb.3.while.end:
+    $r0 = tMOVr killed $r4, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+  
+  bb.4:
+    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
+    $r0 = tMOVr killed $r4, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
+
+# CHECK: while.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-arm-none-eabi"
+  
+  define i32 @skip_spill(i32 %n) #0 {
+  entry:
+    %cmp6 = icmp eq i32 %n, 0
+    br i1 %cmp6, label %while.end, label %while.body.preheader
+  
+  while.body.preheader:                             ; preds = %entry
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %while.body
+  
+  while.body:                                       ; preds = %while.body, %while.body.preheader
+    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+    %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
+    %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
+    %add = add nsw i32 %call, %res.07
+    %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %2 = icmp ne i32 %1, 0
+    br i1 %2, label %while.body, label %while.end
+  
+  while.end:                                        ; preds = %while.body, %entry
+    %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
+    ret i32 %res.0.lcssa
+  }
+  
+  declare i32 @bar(...) local_unnamed_addr #0
+  declare void @llvm.set.loop.iterations.i32(i32) #1
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+  
+  attributes #0 = { "target-features"="+mve.fp" }
+  attributes #1 = { noduplicate nounwind }
+  attributes #2 = { nounwind }
+
+...
+---
+name:            skip_spill
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.4(0x30000000), %bb.1(0x50000000)
+  
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r5, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    tCBZ $r0, %bb.4
+  
+  bb.1.while.body.preheader:
+    successors: %bb.2(0x80000000)
+  
+    $lr = tMOVr $r0, 14, $noreg
+    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
+    t2DoLoopStart killed $r0
+  
+  bb.2.while.body:
+    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  
+    $r5 = tMOVr killed $lr, 14, $noreg
+    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
+    $lr = tMOVr killed $r5, 14, $noreg
+    renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.2
+    tB %bb.3, 14, $noreg
+  
+  bb.3.while.end:
+    $r0 = tMOVr killed $r4, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+  
+  bb.4:
+    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
+    $r0 = tMOVr killed $r4, 14, $noreg
+    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,130 @@
+# RUN: llc -mtriple=thumbv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
+# CHECK:      body:
+# CHECK:      bb.0.entry:
+# CHECK:        t2CMPri $lr, 0, 14
+# CHECK-NEXT:   t2Bcc %bb.3, 0, $cpsr
+# CHECK-NEXT:   tB %bb.1
+# CHECK:      bb.1.do.body.preheader:
+# CHECK:        $lr = tMOVr killed $r3
+# CHECK:      bb.2.do.body:
+# CHECK:        $lr = t2SUBri killed renamable $lr, 1, 14
+# CHECK-NEXT:   t2CMPri $lr, 0, 14, $cpsr
+# CHECK-NEXT:   t2Bcc %bb.2, 1, $cpsr
+# CHECK-NEXT:   tB %bb.3, 14
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main"
+  
+  define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) #0 {
+  entry:
+    %cmp = icmp ne i32 %N, 0
+    %0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
+    br i1 %0, label %do.body.preheader, label %if.end
+  
+  do.body.preheader:                                ; preds = %entry
+    br label %do.body
+  
+  do.body:                                          ; preds = %do.body.preheader, %do.body
+    %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ]
+    %1 = phi i32 [ %N, %do.body.preheader ], [ %2, %do.body ]
+    %scevgep = getelementptr i32, i32* %b, i32 %i.0
+    %scevgep1 = getelementptr i32, i32* %a, i32 %i.0
+    %size = call i32 @llvm.arm.space(i32 4096, i32 undef)
+    %tmp = load i32, i32* %scevgep, align 4
+    store i32 %tmp, i32* %scevgep1, align 4
+    %inc = add nuw i32 %i.0, 1
+    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
+    %3 = icmp ne i32 %2, 0
+    br i1 %3, label %do.body, label %if.end
+  
+  if.end:                                           ; preds = %do.body, %entry
+    ret void
+  }
+  
+  declare i32 @llvm.arm.space(i32, i32) #1
+  declare i1 @llvm.test.set.loop.iterations.i32(i32) #2
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+  
+  attributes #0 = { "target-features"="+lob" }
+  attributes #1 = { nounwind "target-features"="+lob" }
+  attributes #2 = { noduplicate nounwind }
+  attributes #3 = { nounwind }
+
+...
+---
+name:            ne_trip_count
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  
+    frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    t2WhileLoopStart $r3, %bb.3
+    tB %bb.1, 14, $noreg
+  
+  bb.1.do.body.preheader:
+    successors: %bb.2(0x80000000)
+  
+    $lr = tMOVr killed $r3, 14, $noreg
+    renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
+  
+  bb.2.do.body:
+    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  
+    dead renamable $r3 = SPACE 4096, undef renamable $r0
+    renamable $r3 = t2LDRs renamable $r2, renamable $r0, 2, 14, $noreg :: (load 4 from %ir.scevgep)
+    t2STRs killed renamable $r3, renamable $r1, renamable $r0, 2, 14, $noreg :: (store 4 into %ir.scevgep1)
+    renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.2
+    tB %bb.3, 14, $noreg
+  
+  bb.3.if.end:
+    tPOP_RET 14, $noreg, def $r7, def $pc
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/size-limit.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,155 @@
+# RUN: llc -mtriple=armv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: entry:
+# CHECK: $lr = t2DLS
+# CHECK: for.body:
+# CHECK: $lr = t2LEUpdate renamable $lr
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  ; Function Attrs: norecurse nounwind
+  define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+  entry:
+    %cmp8 = icmp eq i32 %N, 0
+    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:                               ; preds = %entry
+    %scevgep = getelementptr i32, i32* %a, i32 -1
+    %scevgep4 = getelementptr i32, i32* %c, i32 -1
+    %scevgep8 = getelementptr i32, i32* %b, i32 -1
+    call void @llvm.set.loop.iterations.i32(i32 %N)
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    ret void
+  
+  for.body:                                         ; preds = %for.body, %for.body.preheader
+    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
+    %size = call i32 @llvm.arm.space(i32 4072, i32 undef)
+    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
+    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
+    %mul = mul nsw i32 %2, %1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+    store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
+    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.body, label %for.cond.cleanup
+  }
+  
+  ; Function Attrs: nounwind
+  declare i32 @llvm.arm.space(i32, i32) #1
+  
+  ; Function Attrs: noduplicate nounwind
+  declare void @llvm.set.loop.iterations.i32(i32) #2
+  
+  ; Function Attrs: noduplicate nounwind
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+  
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #1
+  
+  attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
+  attributes #1 = { nounwind }
+  attributes #2 = { noduplicate nounwind }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"int", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            size_limit
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+  
+    frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $r7 = frame-setup tMOVr $sp, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_register $r7
+    tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
+    t2IT 0, 8, implicit-def $itstate
+    tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
+    renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
+    renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
+    $lr = tMOVr $r3, 14, $noreg
+    t2DoLoopStart killed $r3
+  
+  bb.1.for.body:
+    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  
+    dead renamable $r3 = SPACE 4072, undef renamable $r0
+    renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
+    renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
+    renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
+    early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.1
+    tB %bb.2, 14, $noreg
+  
+  bb.2.for.cond.cleanup:
+    tPOP_RET 14, $noreg, def $r7, def $pc
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/switch.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,198 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops -o -
+# CHECK:      bb.1.for.body.preheader:
+# CHECK:        $lr = t2DLS
+# CHECK-NOT:    t2LoopDec
+# CHECK:      bb.6.for.inc:
+# CHECK:        $lr = t2LEUpdate renamable $lr, %bb.2
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  ; Function Attrs: norecurse nounwind readonly
+  define dso_local arm_aapcscc i32 @search(i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+  entry:
+    %cmp11 = icmp eq i32 %N, 0
+    br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:
+    call void @llvm.set.loop.iterations.i32(i32 %N)
+    br label %for.body
+  
+  for.cond.cleanup:
+    %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
+    %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
+    %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
+    ret i32 %sub
+  
+  for.body:
+    %lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ]
+    %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ]
+    %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ]
+    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ]
+    %1 = load i8, i8* %lsr.iv1, align 1
+    %2 = zext i8 %1 to i32
+    switch i32 %2, label %for.inc [
+      i32 108, label %sw.bb
+      i32 111, label %sw.bb
+      i32 112, label %sw.bb
+      i32 32, label %sw.bb1
+    ]
+  
+  sw.bb:
+    %inc = add nsw i32 %found.012, 1
+    br label %for.inc
+  
+  sw.bb1:
+    %inc2 = add nsw i32 %spaces.013, 1
+    br label %for.inc
+  
+  for.inc:
+    %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
+    %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
+    %scevgep = getelementptr i8, i8* %lsr.iv1, i32 1
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.body, label %for.cond.cleanup
+  }
+  
+  declare void @llvm.set.loop.iterations.i32(i32) #1
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+  declare void @llvm.stackprotector(i8*, i8**) #2
+  
+  attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
+  attributes #1 = { noduplicate nounwind }
+  attributes #2 = { nounwind }
+
+...
+---
+name:            search
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: -8
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.3(0x50000000)
+    liveins: $r0, $r1, $r4, $r6, $lr
+  
+    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r6, $r7, killed $lr
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r6, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    $r7 = frame-setup t2ADDri $sp, 8, 14, $noreg, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa $r7, 8
+    t2CMPri $r1, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.1, 0, killed $cpsr
+  
+  bb.3.for.body.preheader:
+    successors: %bb.4(0x80000000)
+    liveins: $r0, $r1
+  
+    $lr = tMOVr $r1, 14, $noreg
+    t2DoLoopStart killed $r1
+    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+    renamable $r12 = t2MOVi 1, 14, $noreg, $noreg
+    renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
+  
+  bb.4.for.body:
+    successors: %bb.5(0x26666665), %bb.6(0x5999999b)
+    liveins: $lr, $r0, $r1, $r2, $r12
+  
+    renamable $r3 = t2LDRBi12 renamable $r0, 0, 14, $noreg :: (load 1 from %ir.lsr.iv1)
+    renamable $r4 = t2SUBri renamable $r3, 108, 14, $noreg, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2CMPri renamable $r4, 4, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.5, 8, killed $cpsr
+  
+  bb.6.for.body:
+    successors: %bb.7(0x6db6db6e), %bb.5(0x12492492)
+    liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r12
+  
+    renamable $r4 = t2LSLrr renamable $r12, killed renamable $r4, 14, $noreg, $noreg
+    t2TSTri killed renamable $r4, 25, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.5, 0, killed $cpsr
+  
+  bb.7.sw.bb:
+    successors: %bb.8(0x80000000)
+    liveins: $lr, $r0, $r1, $r2, $r12
+  
+    renamable $r2 = nsw t2ADDri killed renamable $r2, 1, 14, $noreg, $noreg
+    t2B %bb.8, 14, $noreg
+  
+  bb.5.for.body:
+    successors: %bb.8(0x80000000)
+    liveins: $lr, $r0, $r1, $r2, $r3, $r12
+  
+    t2CMPri killed renamable $r3, 32, 14, $noreg, implicit-def $cpsr
+    BUNDLE implicit-def dead $itstate, implicit-def $r1, implicit killed $r1, implicit killed $cpsr {
+      t2IT 0, 8, implicit-def $itstate
+      renamable $r1 = nsw t2ADDri killed renamable $r1, 1, 0, killed $cpsr, $noreg, implicit $r1, implicit internal killed $itstate
+    }
+  
+  bb.8.for.inc:
+    successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $r0, $r1, $r2, $r12
+  
+    renamable $r0 = t2ADDri killed renamable $r0, 1, 14, $noreg, $noreg
+    t2LoopEnd renamable $lr, %bb.4
+    t2B %bb.2, 14, $noreg
+  
+  bb.2.for.cond.cleanup:
+    liveins: $r1, $r2
+  
+    renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
+  
+  bb.1:
+    renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
+    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+    renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
+
+...

Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir?rev=364733&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir (added)
+++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir Mon Jul  1 01:21:28 2019
@@ -0,0 +1,131 @@
+# RUN: llc -mtriple=thumbv8.1m.main -mattr=+lob %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
+
+# TODO: Remove the lr = tMOVr
+# CHECK: body:
+# CHECK:   $lr = t2WLS $r2, [[EXIT:%bb[.0-9]+]]
+# CHECK: [[PREHEADER:bb[.0-9a-z]+]]:
+# CHECK:   $lr = tMOVr killed $r2
+# CHECK: [[BODY:bb[.0-9a-z]+]]:
+# CHECK:   $lr = t2LEUpdate renamable $lr
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-arm-unknown"
+  
+  ; Function Attrs: norecurse nounwind optsize
+  define dso_local arm_aapcscc void @copy(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) {
+  entry:
+    %cmp4 = icmp eq i32 %N, 0
+    %0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
+    br i1 %0, label %while.body.preheader, label %while.end
+  
+  while.body.preheader:                             ; preds = %entry
+    br label %while.body
+  
+  while.body:                                       ; preds = %while.body, %while.body.preheader
+    %a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %while.body.preheader ]
+    %b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %while.body.preheader ]
+    %1 = phi i32 [ %N, %while.body.preheader ], [ %3, %while.body ]
+    %incdec.ptr = getelementptr inbounds i16, i16* %b.addr.05, i32 1
+    %2 = load i16, i16* %b.addr.05, align 2, !tbaa !3
+    %incdec.ptr1 = getelementptr inbounds i16, i16* %a.addr.06, i32 1
+    store i16 %2, i16* %a.addr.06, align 2, !tbaa !3
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %while.body, label %while.end
+  
+  while.end:                                        ; preds = %while.body, %entry
+    ret void
+  }
+  
+  declare i1 @llvm.test.set.loop.iterations.i32(i32) #1
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+  
+  attributes #1 = { noduplicate nounwind }
+  attributes #2 = { nounwind }
+  
+  !llvm.module.flags = !{!0, !1}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"short", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            copy
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  
+    frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $r7 = frame-setup tMOVr $sp, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_register $r7
+    t2WhileLoopStart $r2, %bb.3
+    tB %bb.1, 14, $noreg
+  
+  bb.1.while.body.preheader:
+    successors: %bb.2(0x80000000)
+  
+    $lr = tMOVr killed $r2, 14, $noreg
+  
+  bb.2.while.body:
+    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  
+    renamable $r2, renamable $r1 = t2LDRH_POST killed renamable $r1, 2, 14, $noreg :: (load 2 from %ir.b.addr.05, !tbaa !3)
+    early-clobber renamable $r0 = t2STRH_POST killed renamable $r2, killed renamable $r0, 2, 14, $noreg :: (store 2 into %ir.a.addr.06, !tbaa !3)
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.2
+    tB %bb.3, 14, $noreg
+  
+  bb.3.while.end:
+    tPOP_RET 14, $noreg, def $r7, def $pc
+
+...

Removed: llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir?rev=364732&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir (removed)
@@ -1,115 +0,0 @@
-# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
-# CHECK: $lr = tMOVr $r0, 13, $noreg
-# CHECK: $lr = t2DLS killed $r0
-# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
-
---- |
-  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-  target triple = "thumbv8.1m.main"
-  
-  define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
-  entry:
-    %scevgep = getelementptr i32, i32* %q, i32 -1
-    %scevgep3 = getelementptr i32, i32* %p, i32 -1
-    call void @llvm.set.loop.iterations.i32(i32 %n)
-    br label %while.body
-  
-  while.body:
-    %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
-    %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
-    %0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
-    %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1
-    %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 1
-    %1 = load i32, i32* %scevgep2, align 4
-    store i32 %1, i32* %scevgep6, align 4
-    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
-    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
-    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
-    %3 = icmp ne i32 %2, 0
-    br i1 %3, label %while.body, label %while.end
-  
-  while.end:
-    ret i32 0
-  }
-  
-  declare void @llvm.set.loop.iterations.i32(i32) #0
-  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
-  declare void @llvm.stackprotector(i8*, i8**) #1
-  
-  attributes #0 = { noduplicate nounwind }
-  attributes #1 = { nounwind }
-
-...
----
-name:            do_copy
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: true
-hasWinCFI:       false
-registers:       []
-liveins:         
-  - { reg: '$r0', virtual-reg: '' }
-  - { reg: '$r1', virtual-reg: '' }
-  - { reg: '$r2', virtual-reg: '' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       8
-  offsetAdjustment: 0
-  maxAlignment:    4
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           
-  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-constants:       []
-machineFunctionInfo: {}
-body:             |
-  bb.0.entry:
-    successors: %bb.1(0x80000000)
-    liveins: $r0, $r1, $r2, $r7, $lr
-  
-    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr
-    frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    frame-setup CFI_INSTRUCTION offset $lr, -4
-    frame-setup CFI_INSTRUCTION offset $r7, -8
-    $lr = tMOVr $r0, 13, $noreg
-    t2DoLoopStart killed $r0
-    renamable $r0 = t2SUBri killed renamable $r1, 4, 14, $noreg, $noreg
-    renamable $r1 = t2SUBri killed renamable $r2, 4, 14, $noreg, $noreg
-  
-  bb.1.while.body:
-    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-    liveins: $lr, $r0, $r1
-  
-    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep2)
-    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep6)
-    renamable $lr = t2LoopDec killed renamable $lr, 1
-    t2LoopEnd renamable $lr, %bb.1
-    t2B %bb.2, 14, $noreg
-  
-  bb.2.while.end:
-    $r0 = t2MOVi 0, 14, $noreg, $noreg
-    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit killed $r0
-
-...

Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/do-rem.ll Mon Jul  1 01:21:28 2019
@@ -3,10 +3,14 @@
 @g = common local_unnamed_addr global i32* null, align 4
 
 ; CHECK-LABEL: do_with_i32_urem
+; CHECK: entry:
+; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
+; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end
+
 ; CHECK: while.body.preheader:
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n)
 ; CHECK-NEXT: br label %while.body
 
+; CHECK: while.body:
 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
@@ -38,10 +42,14 @@ while.end:
 }
 
 ; CHECK-LABEL: do_with_i32_srem
+; CHECK: entry:
+; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
+; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end
+
 ; CHECK: while.body.preheader:
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n)
 ; CHECK-NEXT: br label %while.body
 
+; CHECK: while.body:
 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
@@ -73,10 +81,14 @@ while.end:
 }
 
 ; CHECK-LABEL: do_with_i32_udiv
+; CHECK: entry:
+; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
+; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end
+
 ; CHECK: while.body.preheader:
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n)
 ; CHECK-NEXT: br label %while.body
 
+; CHECK: while.body:
 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
@@ -108,10 +120,14 @@ while.end:
 }
 
 ; CHECK-LABEL: do_with_i32_sdiv
+; CHECK: entry:
+; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
+; CHECK: br i1 [[TEST]], label %while.body.preheader, label %while.end
+
 ; CHECK: while.body.preheader:
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n)
 ; CHECK-NEXT: br label %while.body
 
+; CHECK: while.body:
 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.preheader ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
@@ -143,7 +159,7 @@ while.end:
 }
 
 ; CHECK-LABEL: do_with_i64_urem
-; CHECK-NOT: llvm.set.loop.iterations
+; CHECK-NOT: llvm.{{.*}}.loop.iterations
 ; CHECK-NOT: llvm.loop.decrement
 define i64 @do_with_i64_urem(i32 %n) {
 entry:
@@ -172,7 +188,7 @@ while.end:
 }
 
 ; CHECK-LABEL: do_with_i64_srem
-; CHECK-NOT: llvm.set.loop.iterations
+; CHECK-NOT: llvm.{{.*}}.loop.iterations
 ; CHECK-NOT: llvm.loop.decrement
 define i64 @do_with_i64_srem(i32 %n) {
 entry:
@@ -201,7 +217,7 @@ while.end:
 }
 
 ; CHECK-LABEL: do_with_i64_udiv
-; CHECK-NOT: llvm.set.loop.iterations
+; CHECK-NOT: llvm.{{.*}}.loop.iterations
 ; CHECK-NOT: llvm.loop.decrement
 define i64 @do_with_i64_udiv(i32 %n) {
 entry:
@@ -230,7 +246,7 @@ while.end:
 }
 
 ; CHECK-LABEL: do_with_i64_sdiv
-; CHECK-NOT: call void @llvm.set.loop.iterations
+; CHECK-NOT: call void @llvm.{{.*}}.loop.iterations
 ; CHECK-NOT: call i32 @llvm.loop.decrement
 define i64 @do_with_i64_sdiv(i32 %n) {
 entry:

Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/fp-emulation.ll Mon Jul  1 01:21:28 2019
@@ -2,9 +2,13 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+soft-float -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT
 
 ; CHECK-LABEL: test_fptosi
-; CHECK: while.body.lr.ph:
+; CHECK-SOFT-NOT: call void @llvm.set.loop.iterations
+
+; CHECK: entry:
 ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1
 ; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1
+
+; CHECK: while.body.lr.ph:
 ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
 ; CHECK-FP-NEXT: br label %while.body
 
@@ -13,8 +17,6 @@
 ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
 ; CHECK-FP: br i1 [[CMP]], label %while.body, label %cleanup.loopexit
 
-; CHECK-SOFT-NOT: call void @llvm.set.loop.iterations
-
 define void @test_fptosi(i32 %n, i32** %g, double** %d) {
 entry:
   %n.off = add i32 %n, -1
@@ -53,9 +55,10 @@ cleanup:
 }
 
 ; CHECK-LABEL: test_fptoui
-; CHECK-FP: while.body.lr.ph:
+; CHECK: entry:
 ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1
 ; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1
+; CHECK-FP: while.body.lr.ph:
 ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
 ; CHECK-FP-NEXT: br label %while.body
 
@@ -104,10 +107,11 @@ cleanup:
 }
 
 ; CHECK-LABEL: load_store_float
+; CHECK: entry:
+; CHECK:   [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1
+; CHECK:   [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1
 ; CHECK: while.body.lr.ph:
-; CHECK: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1
-; CHECK: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
+; CHECK:   call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
 ; CHECK-NEXT: br label %while.body
 
 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %if.end4 ]
@@ -152,12 +156,11 @@ cleanup:
 }
 
 ; CHECK-LABEL: fp_add
-; CHECK: while.body.lr.ph:
-
 ; CHECK-SOFT-NOT: call void @llvm.set.loop.iterations
-
+; CHECK: entry:
 ; CHECK-FP: [[CMP:%[^ ]+]] = icmp ugt i32 %n, 1
 ; CHECK-FP: [[COUNT:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 1
+; CHECK: while.body.lr.ph:
 ; CHECK-FP: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
 ; CHECK: br label %while.body
 

Removed: llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir?rev=364732&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir (removed)
@@ -1,145 +0,0 @@
-# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
-# CHECK: for.body:
-# CHECK-NOT: t2DLS
-# CHECK-NOT: t2LEUpdate
-
---- |
-  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-  target triple = "thumbv8.1m.main-unknown-unknown"
-  
-  ; Function Attrs: norecurse nounwind
-  define dso_local arm_aapcscc void @massive(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
-  entry:
-    %cmp8 = icmp eq i32 %N, 0
-    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
-  
-  for.body.preheader:                               ; preds = %entry
-    %scevgep = getelementptr i32, i32* %a, i32 -1
-    %scevgep4 = getelementptr i32, i32* %c, i32 -1
-    %scevgep8 = getelementptr i32, i32* %b, i32 -1
-    call void @llvm.set.loop.iterations.i32(i32 %N)
-    br label %for.body
-  
-  for.cond.cleanup:                                 ; preds = %for.body, %entry
-    ret void
-  
-  for.body:                                         ; preds = %for.body, %for.body.preheader
-    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
-    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
-    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
-    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
-    %size = call i32 @llvm.arm.space(i32 4096, i32 undef)
-    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
-    %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
-    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
-    %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
-    %mul = mul nsw i32 %2, %1
-    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
-    store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
-    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
-    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
-    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
-    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
-    %4 = icmp ne i32 %3, 0
-    br i1 %4, label %for.body, label %for.cond.cleanup
-  }
-  
-  declare i32 @llvm.arm.space(i32, i32) #1
-  declare void @llvm.set.loop.iterations.i32(i32) #2
-  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
-  
-  attributes #1 = { nounwind }
-  attributes #2 = { noduplicate nounwind }
-  
-  !llvm.module.flags = !{!0, !1}
-  !llvm.ident = !{!2}
-  
-  !0 = !{i32 1, !"wchar_size", i32 4}
-  !1 = !{i32 1, !"min_enum_size", i32 4}
-  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
-  !3 = !{!4, !4, i64 0}
-  !4 = !{!"int", !5, i64 0}
-  !5 = !{!"omnipotent char", !6, i64 0}
-  !6 = !{!"Simple C/C++ TBAA"}
-
-...
----
-name:            massive
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: false
-hasWinCFI:       false
-registers:       []
-liveins:         
-  - { reg: '$r0', virtual-reg: '' }
-  - { reg: '$r1', virtual-reg: '' }
-  - { reg: '$r2', virtual-reg: '' }
-  - { reg: '$r3', virtual-reg: '' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       8
-  offsetAdjustment: 0
-  maxAlignment:    4
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           
-  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-constants:       []
-machineFunctionInfo: {}
-body:             |
-  bb.0.entry:
-    successors: %bb.1(0x80000000)
-  
-    frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
-    frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    frame-setup CFI_INSTRUCTION offset $lr, -4
-    frame-setup CFI_INSTRUCTION offset $r7, -8
-    $r7 = frame-setup tMOVr $sp, 14, $noreg
-    frame-setup CFI_INSTRUCTION def_cfa_register $r7
-    tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
-    t2IT 0, 8, implicit-def $itstate
-    tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
-    renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
-    renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
-    $lr = tMOVr $r3, 14, $noreg
-    t2DoLoopStart killed $r3
-  
-  bb.1.for.body:
-    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  
-    dead renamable $r3 = SPACE 4096, undef renamable $r0
-    renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
-    renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
-    renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
-    early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
-    renamable $lr = t2LoopDec killed renamable $lr, 1
-    t2LoopEnd renamable $lr, %bb.1
-    tB %bb.2, 14, $noreg
-  
-  bb.2.for.cond.cleanup:
-    tPOP_RET 14, $noreg, def $r7, def $pc
-
-...

Removed: llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir?rev=364732&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir (removed)
@@ -1,160 +0,0 @@
-# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
-# CHECK: for.body:
-# CHECK-NOT: t2DLS
-# CHECK-NOT: t2LEUpdate
-
---- |
-  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-  target triple = "thumbv8.1m.main-unknown-unknown"
-  
-  define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
-  entry:
-    %cmp8 = icmp eq i32 %N, 0
-    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
-  
-  for.body.preheader:                               ; preds = %entry
-    br label %for.body
-  
-  for.cond.cleanup:                                 ; preds = %for.end, %entry
-    ret void
-  
-  for.body:                                         ; preds = %for.body.preheader, %for.end
-    %lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ]
-    %lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ]
-    %lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ]
-    %lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
-    %size = call i32 @llvm.arm.space(i32 3072, i32 undef)
-    %0 = load i32, i32* %lsr.iv4, align 4, !tbaa !3
-    %1 = load i32, i32* %lsr.iv2, align 4, !tbaa !3
-    %mul = mul nsw i32 %1, %0
-    store i32 %mul, i32* %lsr.iv1, align 4, !tbaa !3
-    %cmp = icmp ne i32 %0, 0
-    br i1 %cmp, label %middle.block, label %for.end
-  
-  middle.block:                                     ; preds = %for.body
-    %div = udiv i32 %1, %0
-    store i32 %div, i32* %lsr.iv1, align 4, !tbaa !3
-    %size.1 = call i32 @llvm.arm.space(i32 1024, i32 undef)
-    br label %for.end
-  
-  for.end:                                          ; preds = %middle.block, %for.body
-    %lsr.iv.next = add i32 %lsr.iv, -1
-    %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1
-    %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1
-    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
-    %exitcond = icmp eq i32 %lsr.iv.next, 0
-    br i1 %exitcond, label %for.cond.cleanup, label %for.body
-  }
-  
-  declare i32 @llvm.arm.space(i32, i32) #1
-  attributes #1 = { nounwind }
-  
-  !llvm.module.flags = !{!0, !1}
-  !llvm.ident = !{!2}
-  
-  !0 = !{i32 1, !"wchar_size", i32 4}
-  !1 = !{i32 1, !"min_enum_size", i32 4}
-  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
-  !3 = !{!4, !4, i64 0}
-  !4 = !{!"int", !5, i64 0}
-  !5 = !{!"omnipotent char", !6, i64 0}
-  !6 = !{!"Simple C/C++ TBAA"}
-
-...
----
-name:            size_limit
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: false
-hasWinCFI:       false
-registers:       []
-liveins:         
-  - { reg: '$r0', virtual-reg: '' }
-  - { reg: '$r1', virtual-reg: '' }
-  - { reg: '$r2', virtual-reg: '' }
-  - { reg: '$r3', virtual-reg: '' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       16
-  offsetAdjustment: -8
-  maxAlignment:    4
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           
-  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-constants:       []
-machineFunctionInfo: {}
-body:             |
-  bb.0.entry:
-    successors: %bb.1(0x30000000), %bb.3(0x50000000)
-  
-    frame-setup tPUSH 14, $noreg, killed $r4, killed $r6, $r7, killed $lr, implicit-def $sp, implicit $sp
-    frame-setup CFI_INSTRUCTION def_cfa_offset 16
-    frame-setup CFI_INSTRUCTION offset $lr, -4
-    frame-setup CFI_INSTRUCTION offset $r7, -8
-    frame-setup CFI_INSTRUCTION offset $r6, -12
-    frame-setup CFI_INSTRUCTION offset $r4, -16
-    $r7 = frame-setup tADDrSPi $sp, 2, 14, $noreg
-    frame-setup CFI_INSTRUCTION def_cfa $r7, 8
-    tCBNZ $r3, %bb.3
-  
-  bb.1.for.cond.cleanup:
-    tPOP_RET 14, $noreg, def $r4, def $r6, def $r7, def $pc
-  
-  bb.2.for.end:
-    successors: %bb.1(0x04000000), %bb.3(0x7c000000)
-  
-    renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg
-    renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14, $noreg
-    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg
-    renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 1, 14, $noreg
-    tBcc %bb.1, 0, killed $cpsr
-  
-  bb.3.for.body:
-    successors: %bb.4(0x50000000), %bb.2(0x30000000)
-  
-    dead renamable $r12 = SPACE 3072, undef renamable $r0
-    renamable $r12 = t2LDRi12 renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4, !tbaa !3)
-    renamable $lr = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2, !tbaa !3)
-    t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr
-    renamable $r4 = nsw t2MUL renamable $lr, renamable $r12, 14, $noreg
-    tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
-    t2Bcc %bb.2, 0, killed $cpsr
-  
-  bb.4.middle.block:
-    successors: %bb.2(0x80000000)
-  
-    renamable $r4 = t2UDIV killed renamable $lr, killed renamable $r12, 14, $noreg
-    tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
-    dead renamable $r4 = SPACE 1024, undef renamable $r0
-    t2B %bb.2, 14, $noreg
-
-...

Removed: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir?rev=364732&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir (removed)
@@ -1,130 +0,0 @@
-# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
-
-# CHECK: while.body:
-# CHECK-NOT: t2DLS
-# CHECK-NOT: t2LEUpdate
-
---- |
-  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-  target triple = "thumbv8.1m.main-arm-none-eabi"
-  
-  define i32 @skip_spill(i32 %n) #0 {
-  entry:
-    %cmp6 = icmp eq i32 %n, 0
-    br i1 %cmp6, label %while.end, label %while.body.preheader
-  
-  while.body.preheader:                             ; preds = %entry
-    call void @llvm.set.loop.iterations.i32(i32 %n)
-    br label %while.body
-  
-  while.body:                                       ; preds = %while.body, %while.body.preheader
-    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
-    %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
-    %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
-    %add = add nsw i32 %call, %res.07
-    %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
-    %2 = icmp ne i32 %1, 0
-    br i1 %2, label %while.body, label %while.end
-  
-  while.end:                                        ; preds = %while.body, %entry
-    %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
-    ret i32 %res.0.lcssa
-  }
-  
-  declare i32 @bar(...) local_unnamed_addr #0
-  declare void @llvm.set.loop.iterations.i32(i32) #1
-  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
-  
-  attributes #0 = { "target-features"="+mve.fp" }
-  attributes #1 = { noduplicate nounwind }
-  attributes #2 = { nounwind }
-
-...
----
-name:            skip_spill
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: false
-hasWinCFI:       false
-registers:       []
-liveins:         
-  - { reg: '$r0', virtual-reg: '' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       16
-  offsetAdjustment: 0
-  maxAlignment:    4
-  adjustsStack:    true
-  hasCalls:        true
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           
-  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-constants:       []
-machineFunctionInfo: {}
-body:             |
-  bb.0.entry:
-    successors: %bb.4(0x30000000), %bb.1(0x50000000)
-  
-    frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
-    frame-setup CFI_INSTRUCTION def_cfa_offset 16
-    frame-setup CFI_INSTRUCTION offset $lr, -4
-    frame-setup CFI_INSTRUCTION offset $r7, -8
-    frame-setup CFI_INSTRUCTION offset $r5, -12
-    frame-setup CFI_INSTRUCTION offset $r4, -16
-    tCBZ $r0, %bb.4
-  
-  bb.1.while.body.preheader:
-    successors: %bb.2(0x80000000)
-  
-    $lr = tMOVr $r0, 14, $noreg
-    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
-    t2DoLoopStart killed $r0
-  
-  bb.2.while.body:
-    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  
-    $r5 = tMOVr killed $lr, 14, $noreg
-    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
-    $lr = tMOVr killed $r5, 14, $noreg
-    renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
-    renamable $lr = t2LoopDec killed renamable $lr, 1
-    t2LoopEnd renamable $lr, %bb.2
-    tB %bb.3, 14, $noreg
-  
-  bb.3.while.end:
-    $r0 = tMOVr killed $r4, 14, $noreg
-    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
-  
-  bb.4:
-    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
-    $r0 = tMOVr killed $r4, 14, $noreg
-    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
-
-...

Removed: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir?rev=364732&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir (removed)
@@ -1,130 +0,0 @@
-# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s
-
-# CHECK: while.body:
-# CHECK-NOT: t2DLS
-# CHECK-NOT: t2LEUpdate
-
---- |
-  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-  target triple = "thumbv8.1m.main-arm-none-eabi"
-  
-  define i32 @skip_spill(i32 %n) #0 {
-  entry:
-    %cmp6 = icmp eq i32 %n, 0
-    br i1 %cmp6, label %while.end, label %while.body.preheader
-  
-  while.body.preheader:                             ; preds = %entry
-    call void @llvm.set.loop.iterations.i32(i32 %n)
-    br label %while.body
-  
-  while.body:                                       ; preds = %while.body, %while.body.preheader
-    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
-    %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
-    %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
-    %add = add nsw i32 %call, %res.07
-    %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
-    %2 = icmp ne i32 %1, 0
-    br i1 %2, label %while.body, label %while.end
-  
-  while.end:                                        ; preds = %while.body, %entry
-    %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
-    ret i32 %res.0.lcssa
-  }
-  
-  declare i32 @bar(...) local_unnamed_addr #0
-  declare void @llvm.set.loop.iterations.i32(i32) #1
-  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
-  
-  attributes #0 = { "target-features"="+mve.fp" }
-  attributes #1 = { noduplicate nounwind }
-  attributes #2 = { nounwind }
-
-...
----
-name:            skip_spill
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: false
-hasWinCFI:       false
-registers:       []
-liveins:         
-  - { reg: '$r0', virtual-reg: '' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       16
-  offsetAdjustment: 0
-  maxAlignment:    4
-  adjustsStack:    true
-  hasCalls:        true
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           
-  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-constants:       []
-machineFunctionInfo: {}
-body:             |
-  bb.0.entry:
-    successors: %bb.4(0x30000000), %bb.1(0x50000000)
-  
-    frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp
-    frame-setup CFI_INSTRUCTION def_cfa_offset 16
-    frame-setup CFI_INSTRUCTION offset $lr, -4
-    frame-setup CFI_INSTRUCTION offset $r7, -8
-    frame-setup CFI_INSTRUCTION offset $r5, -12
-    frame-setup CFI_INSTRUCTION offset $r4, -16
-    tCBZ $r0, %bb.4
-  
-  bb.1.while.body.preheader:
-    successors: %bb.2(0x80000000)
-  
-    $lr = tMOVr $r0, 14, $noreg
-    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
-    t2DoLoopStart killed $r0
-  
-  bb.2.while.body:
-    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  
-    $r5 = tMOVr killed $lr, 14, $noreg
-    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
-    $lr = tMOVr killed $r5, 14, $noreg
-    renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg
-    renamable $lr = t2LoopDec killed renamable $lr, 1
-    t2LoopEnd renamable $lr, %bb.2
-    tB %bb.3, 14, $noreg
-  
-  bb.3.while.end:
-    $r0 = tMOVr killed $r4, 14, $noreg
-    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
-  
-  bb.4:
-    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
-    $r0 = tMOVr killed $r4, 14, $noreg
-    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
-
-...

Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll Mon Jul  1 01:21:28 2019
@@ -3,7 +3,7 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=-lob -hardware-loops %s -S -o - | FileCheck %s --check-prefix=DISABLED
 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
 
-; DISABLED-NOT: llvm.set.loop.iterations
+; DISABLED-NOT: llvm.{{.*}}.loop.iterations
 ; DISABLED-NOT: llvm.loop.decrement
 
 @g = common local_unnamed_addr global i32* null, align 4
@@ -46,9 +46,12 @@ while.end:
 }
 
 ; CHECK-LABEL: do_inc1
+; CHECK: entry:
+; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
+; CHECK: br i1 [[TEST]], label %while.body.lr.ph, label %while.end
+
 ; CHECK: while.body.lr.ph:
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 %n)
-; CHECK-NEXT: br label %while.body
+; CHECK: br label %while.body
 
 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ %n, %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
@@ -56,12 +59,12 @@ while.end:
 ; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
 
 ; CHECK-LLC-LABEL:do_inc1:
-; CHECK-LLC:        dls lr,
+; CHECK-LLC:        wls lr, {{.*}}, [[LOOP_EXIT:.[LBB_0-3]+]]
 ; CHECK-LLC-NOT:    mov lr,
 ; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9_]+]]:
 ; CHECK-LLC:        le lr, [[LOOP_HEADER]]
 ; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9_]+]]
-; CHECK-LLC:      [[LOOP_EXIT:\.LBB[0-9_]+]]:
+; CHECK-LLC:      [[LOOP_EXIT]]:
 
 define i32 @do_inc1(i32 %n) {
 entry:
@@ -91,26 +94,26 @@ while.end:
 }
 
 ; CHECK-LABEL: do_inc2
-; CHECK: while.body.lr.ph:
+; CHECK: entry:
 ; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, -1
 ; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[ROUND]], 1
 ; CHECK: [[COUNT:%[^ ]+]] = add nuw i32 [[HALVE]], 1
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
-; CHECK-NEXT: br label %while.body
 
-; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
-; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
-; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
-; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
+; CHECK: while.body.lr.ph:
+; CHECK:   call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
+; CHECK:   br label %while.body
+; CHECK: while.body:
+; CHECK:   [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
+; CHECK:   [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
+; CHECK:   [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
+; CHECK:   br i1 [[CMP]], label %while.body, label %while.end.loopexit
 
 ; CHECK-LLC:      do_inc2:
 ; CHECK-LLC-NOT:    mov lr,
-; CHECK-LLC:        dls lr,
+; CHECK-LLC:        dls lr, {{.*}}
 ; CHECK-LLC-NOT:    mov lr,
 ; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9._]+]]:
 ; CHECK-LLC:        le lr, [[LOOP_HEADER]]
-; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9._]+]]
-; CHECK-LLC:      [[LOOP_EXIT:\.LBB[0-9_]+]]:
 
 define i32 @do_inc2(i32 %n) {
 entry:
@@ -141,15 +144,17 @@ while.end:
 
 ; CHECK-LABEL: do_dec2
 
-; CHECK: while.body.lr.ph:
+; CHECK: entry:
 ; CHECK: [[ROUND:%[^ ]+]] = add i32 %n, 1
 ; CHECK: [[CMP:%[^ ]+]] = icmp slt i32 %n, 2
 ; CHECK: [[SMIN:%[^ ]+]] = select i1 [[CMP]], i32 %n, i32 2
 ; CHECK: [[SUB:%[^ ]+]] = sub i32 [[ROUND]], [[SMIN]]
 ; CHECK: [[HALVE:%[^ ]+]] = lshr i32 [[SUB]], 1
 ; CHECK: [[COUNT:%[^ ]+]] = add nuw i32 [[HALVE]], 1
+
+; CHECK: while.body.lr.ph:
 ; CHECK: call void @llvm.set.loop.iterations.i32(i32 [[COUNT]])
-; CHECK-NEXT: br label %while.body
+; CHECK: br label %while.body
 
 ; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[COUNT]], %while.body.lr.ph ], [ [[LOOP_DEC:%[^ ]+]], %while.body ]
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
@@ -158,12 +163,11 @@ while.end:
 
 ; CHECK-LLC:      do_dec2
 ; CHECK-LLC-NOT:    mov lr,
-; CHECK-LLC:        dls lr,
+; CHECK-LLC:        dls lr, {{.*}}
 ; CHECK-LLC-NOT:    mov lr,
 ; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9_]+]]:
 ; CHECK-LLC:        le lr, [[LOOP_HEADER]]
 ; CHECK-LLC-NOT:    b .
-; CHECK-LLC:      @ %while.end
 define i32 @do_dec2(i32 %n) {
 entry:
   %cmp6 = icmp sgt i32 %n, 0

Removed: llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir?rev=364732&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir (removed)
@@ -1,155 +0,0 @@
-# RUN: llc -mtriple=armv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
-# CHECK: entry:
-# CHECK: $lr = t2DLS
-# CHECK: for.body:
-# CHECK: $lr = t2LEUpdate renamable $lr
-
---- |
-  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-  target triple = "thumbv8.1m.main-unknown-unknown"
-  
-  ; Function Attrs: norecurse nounwind
-  define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
-  entry:
-    %cmp8 = icmp eq i32 %N, 0
-    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
-  
-  for.body.preheader:                               ; preds = %entry
-    %scevgep = getelementptr i32, i32* %a, i32 -1
-    %scevgep4 = getelementptr i32, i32* %c, i32 -1
-    %scevgep8 = getelementptr i32, i32* %b, i32 -1
-    call void @llvm.set.loop.iterations.i32(i32 %N)
-    br label %for.body
-  
-  for.cond.cleanup:                                 ; preds = %for.body, %entry
-    ret void
-  
-  for.body:                                         ; preds = %for.body, %for.body.preheader
-    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
-    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
-    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
-    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
-    %size = call i32 @llvm.arm.space(i32 4072, i32 undef)
-    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
-    %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
-    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
-    %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
-    %mul = mul nsw i32 %2, %1
-    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
-    store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
-    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
-    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
-    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
-    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
-    %4 = icmp ne i32 %3, 0
-    br i1 %4, label %for.body, label %for.cond.cleanup
-  }
-  
-  ; Function Attrs: nounwind
-  declare i32 @llvm.arm.space(i32, i32) #1
-  
-  ; Function Attrs: noduplicate nounwind
-  declare void @llvm.set.loop.iterations.i32(i32) #2
-  
-  ; Function Attrs: noduplicate nounwind
-  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
-  
-  ; Function Attrs: nounwind
-  declare void @llvm.stackprotector(i8*, i8**) #1
-  
-  attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
-  attributes #1 = { nounwind }
-  attributes #2 = { noduplicate nounwind }
-  
-  !llvm.module.flags = !{!0, !1}
-  !llvm.ident = !{!2}
-  
-  !0 = !{i32 1, !"wchar_size", i32 4}
-  !1 = !{i32 1, !"min_enum_size", i32 4}
-  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
-  !3 = !{!4, !4, i64 0}
-  !4 = !{!"int", !5, i64 0}
-  !5 = !{!"omnipotent char", !6, i64 0}
-  !6 = !{!"Simple C/C++ TBAA"}
-
-...
----
-name:            size_limit
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: false
-hasWinCFI:       false
-registers:       []
-liveins:         
-  - { reg: '$r0', virtual-reg: '' }
-  - { reg: '$r1', virtual-reg: '' }
-  - { reg: '$r2', virtual-reg: '' }
-  - { reg: '$r3', virtual-reg: '' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       8
-  offsetAdjustment: 0
-  maxAlignment:    4
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           
-  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-constants:       []
-machineFunctionInfo: {}
-body:             |
-  bb.0.entry:
-    successors: %bb.1(0x80000000)
-  
-    frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
-    frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    frame-setup CFI_INSTRUCTION offset $lr, -4
-    frame-setup CFI_INSTRUCTION offset $r7, -8
-    $r7 = frame-setup tMOVr $sp, 14, $noreg
-    frame-setup CFI_INSTRUCTION def_cfa_register $r7
-    tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
-    t2IT 0, 8, implicit-def $itstate
-    tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
-    renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
-    renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
-    $lr = tMOVr $r3, 14, $noreg
-    t2DoLoopStart killed $r3
-  
-  bb.1.for.body:
-    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  
-    dead renamable $r3 = SPACE 4072, undef renamable $r0
-    renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
-    renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
-    renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
-    early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
-    renamable $lr = t2LoopDec killed renamable $lr, 1
-    t2LoopEnd renamable $lr, %bb.1
-    tB %bb.2, 14, $noreg
-  
-  bb.2.for.cond.cleanup:
-    tPOP_RET 14, $noreg, def $r7, def $pc
-
-...

Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll?rev=364733&r1=364732&r2=364733&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll Mon Jul  1 01:21:28 2019
@@ -109,6 +109,35 @@ while.end:
   ret i32 0
 }
 
+; CHECK-LABEL: pre_existing_test_set
+; CHECK: call i1 @llvm.test.set.loop.iterations
+; CHECK-NOT: llvm.set{{.*}}.loop.iterations
+; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+; CHECK-NOT: call i32 @llvm.loop.decrement.reg
+define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
+entry:
+  %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n)
+  br i1 %guard, label %while.preheader, label %while.end
+
+while.preheader:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ]
+  %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ]
+  %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ]
+  %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1
+  %1 = load i32, i32* %q.addr.05, align 4
+  %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1
+  store i32 %1, i32* %p.addr.04, align 4
+  %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %while.body, label %while.end
+
+while.end:                                        ; preds = %while.body
+  ret i32 0
+}
+
 ; CHECK-LABEL: pre_existing_inner
 ; CHECK-NOT: llvm.set.loop.iterations
 ; CHECK: while.cond1.preheader.us:
@@ -223,14 +252,16 @@ exit:
 }
 
 ; CHECK-LABEL: search
+; CHECK: entry:
+; CHECK:   [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
+; CHECK:   br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup
 ; CHECK: for.body.preheader:
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
-; CHECK: br label %for.body
+; CHECK:   br label %for.body
 ; CHECK: for.body:
 ; CHECK: for.inc:
-; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32
-; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
-; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup
+; CHECK:   [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32
+; CHECK:   [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
+; CHECK:   br i1 [[CMP]], label %for.body, label %for.cond.cleanup
 define i32 @search(i8* nocapture readonly %c, i32 %N) {
 entry:
   %cmp11 = icmp eq i32 %N, 0
@@ -276,16 +307,16 @@ for.inc:
 ; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
 
 ; TODO: We should be able to support the unrolled loop body.
-; CHECK-UNROLL-LABEL: unroll_inc_int:
+; CHECK-UNROLL-LABEL: unroll_inc_int
 ; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
 ; CHECK-UNROLL-NOT: dls
 ; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
 ; CHECK-UNROLL-NOT: le lr, [[LOOP]]
 ; CHECK-UNROLL:     bne [[LOOP]]
-; CHECK-UNROLL:     %for.body.epil.preheader
-; CHECK-UNROLL:     dls
-; CHECK-UNROLL:     %for.body.epil
-; CHECK-UNROLL:     le
+; CHECK-UNROLL:     wls lr, lr, [[EXIT:.LBB[0-9_]+]]
+; CHECK-UNROLL:     [[EPIL:.LBB[0-9_]+]]:
+; CHECK-UNROLL:     le lr, [[EPIL]]
+; CHECK-UNROLL-NEXT: [[EXIT]]
 
 define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
 entry:
@@ -310,24 +341,27 @@ for.body:
 }
 
 ; CHECK-LABEL: unroll_inc_unsigned
-; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
 ; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
 
 ; CHECK-LLC-LABEL: unroll_inc_unsigned:
-; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]]
-; CHECK-LLC: le  lr
+; CHECK-LLC: wls lr, r3, [[EXIT:.LBB[0-9_]+]]
+; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]:
+; CHECK-LLC: le lr, [[HEADER]]
+; CHECK-LLC-NEXT: [[EXIT]]:
 
 ; TODO: We should be able to support the unrolled loop body.
-; CHECK-UNROLL-LABEL: unroll_inc_unsigned:
+; CHECK-UNROLL-LABEL: unroll_inc_unsigned
 ; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
 ; CHECK-UNROLL-NOT: dls
 ; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
 ; CHECK-UNROLL-NOT: le lr, [[LOOP]]
 ; CHECK-UNROLL:     bne [[LOOP]]
-; CHECK-UNROLL:     %for.body.epil.preheader
-; CHECK-UNROLL:     dls
-; CHECK-UNROLL:     %for.body.epil
-; CHECK-UNROLL:     le
+; CHECK-UNROLL:     wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]]
+; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]:
+; CHECK-UNROLL:     le lr, [[EPIL]]
+; CHECK-UNROLL: [[EPIL_EXIT]]:
+; CHECK-UNROLL:     pop
 define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
 entry:
   %cmp8 = icmp eq i32 %N, 0
@@ -357,15 +391,21 @@ for.body:
 ; TODO: An unnecessary register is being held to hold COUNT, lr should just
 ; be used instead.
 ; CHECK-LLC-LABEL: unroll_dec_int:
-; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]]
-; CHECK-LLC: subs  [[COUNT]], #1
-; CHECK-LLC: le  lr
-
-; CHECK-UNROLL-LABEL: unroll_dec_int
-; CHECK-UNROLL: dls lr
-; CHECK-UNROLL: le lr
-; CHECK-UNROLL: dls lr
-; CHECK-UNROLL: le lr
+; CHECK-LLC: dls lr, r3
+; CHECK-LLC-NOT: mov lr, r3
+; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]:
+; CHECK-LLC: le lr, [[HEADER]]
+
+; CHECK-UNROLL-LABEL: unroll_dec_int:
+; CHECK-UNROLL:         wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]]
+; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]:
+; CHECK-UNROLL:         le lr, [[PROLOGUE]]
+; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]:
+; CHECK-UNROLL:         dls lr, lr
+; CHECK-UNROLL:      [[BODY:.LBB[0-9_]+]]:
+; CHECK-UNROLL:         le lr, [[BODY]]
+; CHECK-UNROLL-NOT:     b
+; CHECK-UNROLL:         pop
 define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
 entry:
   %cmp8 = icmp sgt i32 %N, 0
@@ -389,5 +429,6 @@ for.body:
 }
 
 declare void @llvm.set.loop.iterations.i32(i32) #0
+declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
 

Removed: llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir?rev=364732&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir (removed)
@@ -1,198 +0,0 @@
-# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops -o -
-# CHECK:      bb.1.for.body.preheader:
-# CHECK:        $lr = t2DLS
-# CHECK-NOT:    t2LoopDec
-# CHECK:      bb.6.for.inc:
-# CHECK:        $lr = t2LEUpdate renamable $lr, %bb.2
-
---- |
-  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-  target triple = "thumbv8.1m.main-unknown-unknown"
-  
-  ; Function Attrs: norecurse nounwind readonly
-  define dso_local arm_aapcscc i32 @search(i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
-  entry:
-    %cmp11 = icmp eq i32 %N, 0
-    br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader
-  
-  for.body.preheader:
-    call void @llvm.set.loop.iterations.i32(i32 %N)
-    br label %for.body
-  
-  for.cond.cleanup:
-    %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
-    %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
-    %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
-    ret i32 %sub
-  
-  for.body:
-    %lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ]
-    %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ]
-    %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ]
-    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ]
-    %1 = load i8, i8* %lsr.iv1, align 1
-    %2 = zext i8 %1 to i32
-    switch i32 %2, label %for.inc [
-      i32 108, label %sw.bb
-      i32 111, label %sw.bb
-      i32 112, label %sw.bb
-      i32 32, label %sw.bb1
-    ]
-  
-  sw.bb:
-    %inc = add nsw i32 %found.012, 1
-    br label %for.inc
-  
-  sw.bb1:
-    %inc2 = add nsw i32 %spaces.013, 1
-    br label %for.inc
-  
-  for.inc:
-    %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
-    %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
-    %scevgep = getelementptr i8, i8* %lsr.iv1, i32 1
-    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
-    %4 = icmp ne i32 %3, 0
-    br i1 %4, label %for.body, label %for.cond.cleanup
-  }
-  
-  declare void @llvm.set.loop.iterations.i32(i32) #1
-  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
-  declare void @llvm.stackprotector(i8*, i8**) #2
-  
-  attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
-  attributes #1 = { noduplicate nounwind }
-  attributes #2 = { nounwind }
-
-...
----
-name:            search
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
-tracksRegLiveness: true
-hasWinCFI:       false
-registers:       []
-liveins:         
-  - { reg: '$r0', virtual-reg: '' }
-  - { reg: '$r1', virtual-reg: '' }
-frameInfo:       
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       16
-  offsetAdjustment: -8
-  maxAlignment:    4
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           
-  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
-      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
-      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-constants:       []
-machineFunctionInfo: {}
-body:             |
-  bb.0.entry:
-    successors: %bb.1(0x30000000), %bb.3(0x50000000)
-    liveins: $r0, $r1, $r4, $r6, $lr
-  
-    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r6, $r7, killed $lr
-    frame-setup CFI_INSTRUCTION def_cfa_offset 16
-    frame-setup CFI_INSTRUCTION offset $lr, -4
-    frame-setup CFI_INSTRUCTION offset $r7, -8
-    frame-setup CFI_INSTRUCTION offset $r6, -12
-    frame-setup CFI_INSTRUCTION offset $r4, -16
-    $r7 = frame-setup t2ADDri $sp, 8, 14, $noreg, $noreg
-    frame-setup CFI_INSTRUCTION def_cfa $r7, 8
-    t2CMPri $r1, 0, 14, $noreg, implicit-def $cpsr
-    t2Bcc %bb.1, 0, killed $cpsr
-  
-  bb.3.for.body.preheader:
-    successors: %bb.4(0x80000000)
-    liveins: $r0, $r1
-  
-    $lr = tMOVr $r1, 14, $noreg
-    t2DoLoopStart killed $r1
-    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
-    renamable $r12 = t2MOVi 1, 14, $noreg, $noreg
-    renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
-  
-  bb.4.for.body:
-    successors: %bb.5(0x26666665), %bb.6(0x5999999b)
-    liveins: $lr, $r0, $r1, $r2, $r12
-  
-    renamable $r3 = t2LDRBi12 renamable $r0, 0, 14, $noreg :: (load 1 from %ir.lsr.iv1)
-    renamable $r4 = t2SUBri renamable $r3, 108, 14, $noreg, $noreg
-    renamable $lr = t2LoopDec killed renamable $lr, 1
-    t2CMPri renamable $r4, 4, 14, $noreg, implicit-def $cpsr
-    t2Bcc %bb.5, 8, killed $cpsr
-  
-  bb.6.for.body:
-    successors: %bb.7(0x6db6db6e), %bb.5(0x12492492)
-    liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r12
-  
-    renamable $r4 = t2LSLrr renamable $r12, killed renamable $r4, 14, $noreg, $noreg
-    t2TSTri killed renamable $r4, 25, 14, $noreg, implicit-def $cpsr
-    t2Bcc %bb.5, 0, killed $cpsr
-  
-  bb.7.sw.bb:
-    successors: %bb.8(0x80000000)
-    liveins: $lr, $r0, $r1, $r2, $r12
-  
-    renamable $r2 = nsw t2ADDri killed renamable $r2, 1, 14, $noreg, $noreg
-    t2B %bb.8, 14, $noreg
-  
-  bb.5.for.body:
-    successors: %bb.8(0x80000000)
-    liveins: $lr, $r0, $r1, $r2, $r3, $r12
-  
-    t2CMPri killed renamable $r3, 32, 14, $noreg, implicit-def $cpsr
-    BUNDLE implicit-def dead $itstate, implicit-def $r1, implicit killed $r1, implicit killed $cpsr {
-      t2IT 0, 8, implicit-def $itstate
-      renamable $r1 = nsw t2ADDri killed renamable $r1, 1, 0, killed $cpsr, $noreg, implicit $r1, implicit internal killed $itstate
-    }
-  
-  bb.8.for.inc:
-    successors: %bb.4(0x7c000000), %bb.2(0x04000000)
-    liveins: $lr, $r0, $r1, $r2, $r12
-  
-    renamable $r0 = t2ADDri killed renamable $r0, 1, 14, $noreg, $noreg
-    t2LoopEnd renamable $lr, %bb.4
-    t2B %bb.2, 14, $noreg
-  
-  bb.2.for.cond.cleanup:
-    liveins: $r1, $r2
-  
-    renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
-    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
-  
-  bb.1:
-    renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
-    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
-    renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
-    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
-
-...




More information about the llvm-commits mailing list