[llvm] r364288 - [ARM] DLS/LE low-overhead loop code generation

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 25 03:45:52 PDT 2019


Author: sam_parker
Date: Tue Jun 25 03:45:51 2019
New Revision: 364288

URL: http://llvm.org/viewvc/llvm-project?rev=364288&view=rev
Log:
[ARM] DLS/LE low-overhead loop code generation

Introduce three pseudo instructions to be used during DAG ISel to
represent v8.1-m low-overhead loops. One maps to set_loop_iterations
while loop_decrement_reg is lowered to two, so that we can separate
the decrement and branching operations. The pseudo instructions are
expanded pre-emission, where we can still decide whether we actually
want to generate a low-overhead loop, in a new pass:
ARMLowOverheadLoops. The pass currently bails, reverting to an sub,
icmp and br, in the cases where a call or stack spill/restore happens
between the decrement and branching instructions, or if the loop is
too large.

Differential Revision: https://reviews.llvm.org/D63476

Added:
    llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
    llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir
    llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir
Modified:
    llvm/trunk/lib/Target/ARM/ARM.h
    llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
    llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
    llvm/trunk/lib/Target/ARM/CMakeLists.txt
    llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
    llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll
    llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll
    llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll

Modified: llvm/trunk/lib/Target/ARM/ARM.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.h?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.h (original)
+++ llvm/trunk/lib/Target/ARM/ARM.h Tue Jun 25 03:45:51 2019
@@ -35,7 +35,7 @@ class MachineInstr;
 class MCInst;
 class PassRegistry;
 
-
+FunctionPass *createARMLowOverheadLoopsPass();
 Pass *createARMParallelDSPPass();
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
@@ -66,6 +66,7 @@ void initializeARMExpandPseudoPass(PassR
 void initializeThumb2SizeReducePass(PassRegistry &);
 void initializeThumb2ITBlockPass(PassRegistry &);
 void initializeMVEVPTBlockPass(PassRegistry &);
+void initializeARMLowOverheadLoopsPass(PassRegistry &);
 
 } // end namespace llvm
 

Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Tue Jun 25 03:45:51 2019
@@ -2986,6 +2986,36 @@ void ARMDAGToDAGISel::Select(SDNode *N)
     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
 
     if (InFlag.getOpcode() == ARMISD::CMPZ) {
+      if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+        SDValue Int = InFlag.getOperand(0);
+        uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+
+        // Handle low-overhead loops.
+        if (ID == Intrinsic::loop_decrement_reg) {
+          SDValue Elements = Int.getOperand(2);
+          SDValue Size = CurDAG->getTargetConstant(
+            cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
+                                 MVT::i32);
+
+          SDValue Args[] = { Elements, Size, Int.getOperand(0) };
+          SDNode *LoopDec =
+            CurDAG->getMachineNode(ARM::t2LoopDec, dl,
+                                   CurDAG->getVTList(MVT::i32, MVT::Other),
+                                   Args);
+          ReplaceUses(Int.getNode(), LoopDec);
+
+          SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
+          SDNode *LoopEnd =
+            CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
+
+          ReplaceUses(N, LoopEnd);
+          CurDAG->RemoveDeadNode(N);
+          CurDAG->RemoveDeadNode(InFlag.getNode());
+          CurDAG->RemoveDeadNode(Int.getNode());
+          return;
+        }
+      }
+
       bool SwitchEQNEToPLMI;
       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
       InFlag = N->getOperand(4);

Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Tue Jun 25 03:45:51 2019
@@ -5135,6 +5135,7 @@ class t2LOL<dag oops, dag iops, string a
   let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB];
 }
 
+let isNotDuplicable = 1 in {
 def t2WLS : t2LOL<(outs GPRlr:$LR),
                   (ins rGPR:$Rn, wlslabel_u11:$label),
                   "wls", "$LR, $Rn, $label"> {
@@ -5178,6 +5179,21 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u
   let Inst{10-1} = label{10-1};
 }
 
+def t2DoLoopStart :
+  t2PseudoInst<(outs), (ins rGPR:$elts), 4, IIC_Br,
+  [(int_set_loop_iterations rGPR:$elts)]>, Sched<[WriteBr]>;
+
+def t2LoopDec :
+  t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size),
+               4, IIC_Br, []>, Sched<[WriteBr]>;
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 1 in
+def t2LoopEnd :
+  t2PseudoInst<(outs), (ins GPRlr:$elts, brtarget:$target),
+  8, IIC_Br, []>, Sched<[WriteBr]>;
+
+} // end isNotDuplicable
+
 class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
   : V8_1MI<(outs rGPR:$Rd), (ins GPRwithZR:$Rn, GPRwithZR:$Rm, pred_noal:$fcond),
            AddrModeNone, NoItinerary, iname, "$Rd, $Rn, $Rm, $fcond", "", pattern> {

Added: llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp?rev=364288&view=auto
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp (added)
+++ llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp Tue Jun 25 03:45:51 2019
@@ -0,0 +1,295 @@
+//===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Finalize v8.1-m low-overhead loops by converting the associated pseudo
+/// instructions into machine operations.
+/// The expectation is that the loop contains three pseudo instructions:
+/// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
+///   form should be in the preheader, whereas the while form should be in the
+///   preheaders only predecessor. TODO: Could DoLoopStart get moved into the
+///   pre-preheader?
+/// - t2LoopDec - placed within in the loop body.
+/// - t2LoopEnd - the loop latch terminator.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBasicBlockInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-low-overhead-loops"
+#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
+
+namespace {
+
+  class ARMLowOverheadLoops : public MachineFunctionPass {
+    const ARMBaseInstrInfo    *TII = nullptr;
+    MachineRegisterInfo       *MRI = nullptr;
+    std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
+
+  public:
+    static char ID;
+
+    ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    bool ProcessLoop(MachineLoop *ML);
+
+    void Expand(MachineLoop *ML, MachineInstr *Start,
+                MachineInstr *Dec, MachineInstr *End, bool Revert);
+
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoVRegs);
+    }
+
+    StringRef getPassName() const override {
+      return ARM_LOW_OVERHEAD_LOOPS_NAME;
+    }
+  };
+}
+  
+char ARMLowOverheadLoops::ID = 0;
+
+INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
+                false, false)
+
+bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
+  //if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
+    //return false;
+
+  LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n");
+
+  auto &MLI = getAnalysis<MachineLoopInfo>();
+  MRI = &MF.getRegInfo();
+  TII = static_cast<const ARMBaseInstrInfo*>(
+    MF.getSubtarget().getInstrInfo());
+  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(MF));
+  BBUtils->computeAllBlockSizes();
+
+  bool Changed = false;
+  for (auto ML : MLI) {
+    if (!ML->getParentLoop())
+      Changed |= ProcessLoop(ML);
+  }
+  return Changed;
+}
+
+bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
+
+  bool Changed = false;
+
+  // Process inner loops first.
+  for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
+    Changed |= ProcessLoop(*I);
+
+  LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
+
+  auto IsLoopStart = [](MachineInstr &MI) {
+    return MI.getOpcode() == ARM::t2DoLoopStart;
+  };
+
+  auto SearchForStart =
+    [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr* {
+    for (auto &MI : *MBB) {
+      if (IsLoopStart(MI))
+        return &MI;
+    }
+    return nullptr;
+  };
+
+  MachineInstr *Start = nullptr;
+  MachineInstr *Dec = nullptr;
+  MachineInstr *End = nullptr;
+  bool Revert = false;
+
+  if (auto *Preheader = ML->getLoopPreheader())
+    Start = SearchForStart(Preheader);
+
+  // Find the low-overhead loop components and decide whether or not to fall
+  // back to a normal loop.
+  for (auto *MBB : reverse(ML->getBlocks())) {
+    for (auto &MI : *MBB) {
+      if (MI.getOpcode() == ARM::t2LoopDec)
+        Dec = &MI;
+      else if (MI.getOpcode() == ARM::t2LoopEnd)
+        End = &MI;
+
+      if (!Dec)
+        continue;
+
+      // TODO: Though the call will require LE to execute again, does this
+      // mean we should revert? Always executing LE hopefully should be faster
+      // than performing a sub,cmp,br or even subs,br.
+      if (MI.getDesc().isCall())
+        Revert = true;
+
+      // If we find that we load/store LR between LoopDec and LoopEnd, expect
+      // that the decremented value has been spilled to the stack. Because
+      // this value isn't actually going to be produced until the latch, by LE,
+      // we would need to generate a real sub. The value is also likely to be
+      // reloaded for use of LoopEnd - in which in case we'd need to perform
+      // an add because it gets negated again by LE! The other option is to
+      // then generate the other form of LE which doesn't perform the sub.
+      if (MI.mayLoad() || MI.mayStore())
+        Revert =
+          MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == ARM::LR;
+    }
+
+    if (Dec && End && Revert)
+      break;
+  }
+
+  if (Start || Dec || End) {
+    if (!Start || !Dec || !End)
+      report_fatal_error("Failed to find all loop components");
+  } else {
+    LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
+    return Changed;
+  }
+
+  if (!End->getOperand(1).isMBB() ||
+      End->getOperand(1).getMBB() != ML->getHeader())
+    report_fatal_error("Expected LoopEnd to target Loop Header");
+
+  // The LE instructions has 12-bits for the label offset.
+  if (!BBUtils->isBBInRange(End, ML->getHeader(), 4096)) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: Too large for a low-overhead loop!\n");
+    Revert = true;
+  }
+
+  LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start
+                    << " - Found Loop Dec: " << *Dec
+                    << " - Found Loop End: " << *End);
+
+  Expand(ML, Start, Dec, End, Revert);
+  return true;
+}
+
+void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
+                                 MachineInstr *Dec, MachineInstr *End,
+                                 bool Revert) {
+
+  auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
+    // The trip count should already been held in LR since the instructions
+    // within the loop can only read and write to LR. So, there should be a
+    // mov to setup the count. WLS/DLS perform this move, so find the original
+    // and delete it - inserting WLS/DLS in its place.
+    MachineBasicBlock *MBB = Start->getParent();
+    MachineInstr *InsertPt = Start;
+    for (auto &I : MRI->def_instructions(ARM::LR)) {
+      if (I.getParent() != MBB)
+        continue;
+
+      // Always execute.
+      if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() != ARMCC::AL)
+        continue;
+
+      // Only handle move reg, if the trip count it will need moving into a reg
+      // before the setup instruction anyway.
+      if (!I.getDesc().isMoveReg() ||
+          !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
+        continue;
+      InsertPt = &I;
+      break;
+    }
+
+    MachineInstrBuilder MIB =
+      BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(ARM::t2DLS));
+    if (InsertPt != Start)
+      InsertPt->eraseFromParent();
+
+    MIB.addDef(ARM::LR);
+    MIB.add(Start->getOperand(0));
+    LLVM_DEBUG(dbgs() << "ARM Loops: Inserted DLS: " << *MIB);
+    Start->eraseFromParent();
+  };
+
+  // Combine the LoopDec and LoopEnd instructions into LE(TP).
+  auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
+                              MachineInstr *End) {
+    MachineBasicBlock *MBB = End->getParent();
+    MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
+                                      TII->get(ARM::t2LEUpdate));
+    MIB.addDef(ARM::LR);
+    MIB.add(End->getOperand(0));
+    MIB.add(End->getOperand(1));
+    LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
+
+    // If there is a branch after loop end, which branches to the fallthrough
+    // block, remove the branch.
+    MachineBasicBlock *Latch = End->getParent();
+    MachineInstr *Terminator = &Latch->instr_back();
+    if (End != Terminator) {
+      MachineBasicBlock *Exit = ML->getExitBlock();
+      if (Latch->isLayoutSuccessor(Exit)) {
+        LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop exit branch: "
+                   << *Terminator);
+        Terminator->eraseFromParent();
+      }
+    }
+    End->eraseFromParent();
+    Dec->eraseFromParent();
+  };
+
+  // Generate a subs, or sub and cmp, and a branch instead of an LE.
+  // TODO: Check flags so that we can possibly generate a subs.
+  auto ExpandBranch = [this](MachineInstr *Dec, MachineInstr *End) {
+    LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub, cmp, br.\n");
+    // Create sub
+    MachineBasicBlock *MBB = Dec->getParent();
+    MachineInstrBuilder MIB = BuildMI(*MBB, Dec, Dec->getDebugLoc(),
+                                      TII->get(ARM::t2SUBri));
+    MIB.addDef(ARM::LR);
+    MIB.add(Dec->getOperand(1));
+    MIB.add(Dec->getOperand(2));
+    MIB.addImm(ARMCC::AL);
+    MIB.addReg(0);
+    MIB.addReg(0);
+
+    // Create cmp
+    MBB = End->getParent();
+    MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2CMPri));
+    MIB.addReg(ARM::LR);
+    MIB.addImm(0);
+    MIB.addImm(ARMCC::AL);
+
+    // Create bne
+    MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2Bcc));
+    MIB.add(End->getOperand(1));  // branch target
+    MIB.addImm(ARMCC::NE);        // condition code
+    End->eraseFromParent();
+    Dec->eraseFromParent();
+  };
+
+  if (Revert) {
+    Start->eraseFromParent();
+    ExpandBranch(Dec, End);
+  } else {
+    ExpandLoopStart(ML, Start);
+    ExpandLoopEnd(ML, Dec, End);
+  }
+}
+
+FunctionPass *llvm::createARMLowOverheadLoopsPass() {
+  return new ARMLowOverheadLoops();
+}

Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Tue Jun 25 03:45:51 2019
@@ -96,6 +96,7 @@ extern "C" void LLVMInitializeARMTarget(
   initializeARMExpandPseudoPass(Registry);
   initializeThumb2SizeReducePass(Registry);
   initializeMVEVPTBlockPass(Registry);
+  initializeARMLowOverheadLoopsPass(Registry);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -446,6 +447,9 @@ bool ARMPassConfig::addPreISel() {
                                   MergeExternalByDefault));
   }
 
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createHardwareLoopsPass());
+
   return false;
 }
 
@@ -526,4 +530,5 @@ void ARMPassConfig::addPreEmitPass() {
     addPass(createARMOptimizeBarriersPass());
 
   addPass(createARMConstantIslandPass());
+  addPass(createARMLowOverheadLoopsPass());
 }

Modified: llvm/trunk/lib/Target/ARM/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/CMakeLists.txt?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/ARM/CMakeLists.txt Tue Jun 25 03:45:51 2019
@@ -39,6 +39,7 @@ add_llvm_target(ARMCodeGen
   ARMLegalizerInfo.cpp
   ARMParallelDSP.cpp
   ARMLoadStoreOptimizer.cpp
+  ARMLowOverheadLoops.cpp
   ARMMCInstLower.cpp
   ARMMachineFunctionInfo.cpp
   ARMMacroFusion.cpp

Modified: llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll Tue Jun 25 03:45:51 2019
@@ -49,6 +49,10 @@
 ; CHECK-NEXT:      Dominator Tree Construction
 ; CHECK-NEXT:      Exception handling preparation
 ; CHECK-NEXT:      Merge internal globals
+; CHECK-NEXT:      Dominator Tree Construction
+; CHECK-NEXT:      Natural Loop Information
+; CHECK-NEXT:      Scalar Evolution Analysis
+; CHECK-NEXT:      Hardware Loop Insertion
 ; CHECK-NEXT:      Safe Stack instrumentation pass
 ; CHECK-NEXT:      Insert stack protectors
 ; CHECK-NEXT:      Module Verifier
@@ -138,6 +142,9 @@
 ; CHECK-NEXT:      Unpack machine instruction bundles
 ; CHECK-NEXT:      optimise barriers pass
 ; CHECK-NEXT:      ARM constant island placement and branch shortening pass
+; CHECK-NEXT:      MachineDominator Tree Construction
+; CHECK-NEXT:      Machine Natural Loop Construction
+; CHECK-NEXT:      ARM Low Overhead Loops pass
 ; CHECK-NEXT:      Contiguously Lay Out Funclets
 ; CHECK-NEXT:      StackMap Liveness Analysis
 ; CHECK-NEXT:      Live DEBUG_VALUE analysis

Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/calls.ll Tue Jun 25 03:45:51 2019
@@ -3,7 +3,7 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fp-armv8,+fullfp16 -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
-
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+lob,+mve.fp -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
 
 ; CHECK-LABEL: skip_call
 ; CHECK-NOT: call void @llvm.set.loop.iterations
@@ -41,6 +41,15 @@ while.end:
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
 ; CHECK: br i1 [[CMP]], label %loop, label %exit
 
+; CHECK-LLC-LABEL: test_target_specific:
+; CHECK-LLC:        mov.w lr, #50
+; CHECK-LLC:        dls lr, lr
+; CHECK-LLC-NOT:    mov lr,
+; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC:        le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT:    b .
+; CHECK-LLC:      @ %exit
+
 define i32 @test_target_specific(i32* %a, i32* %b) {
 entry:
   br label %loop
@@ -86,6 +95,17 @@ exit:
 ; CHECK-MVE-NOT:  call void @llvm.set.loop.iterations
 ; CHECK-FP:       call void @llvm.set.loop.iterations.i32(i32 100)
 ; CHECK-MVEFP:    call void @llvm.set.loop.iterations.i32(i32 100)
+
+; CHECK-LLC-LABEL: test_fabs:
+; CHECK-LLC:        mov.w lr, #100
+; CHECK-LLC:        dls lr, lr
+; CHECK-LLC-NOT:    mov lr,
+; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC-NOT:    bl
+; CHECK-LLC:        le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT:    b .
+; CHECK-LLC:      @ %exit
+
 define float @test_fabs(float* %a) {
 entry:
   br label %loop

Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/cond-mov.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,115 @@
+# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: $lr = tMOVr $r0, 13, $noreg
+# CHECK: $lr = t2DLS killed $r0
+# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main"
+  
+  define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
+  entry:
+    %scevgep = getelementptr i32, i32* %q, i32 -1
+    %scevgep3 = getelementptr i32, i32* %p, i32 -1
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %while.body
+  
+  while.body:
+    %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
+    %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
+    %0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
+    %scevgep2 = getelementptr i32, i32* %lsr.iv, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv4, i32 1
+    %1 = load i32, i32* %scevgep2, align 4
+    store i32 %1, i32* %scevgep6, align 4
+    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
+    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
+    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %3 = icmp ne i32 %2, 0
+    br i1 %3, label %while.body, label %while.end
+  
+  while.end:
+    ret i32 0
+  }
+  
+  declare void @llvm.set.loop.iterations.i32(i32) #0
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
+  declare void @llvm.stackprotector(i8*, i8**) #1
+  
+  attributes #0 = { noduplicate nounwind }
+  attributes #1 = { nounwind }
+
+...
+---
+name:            do_copy
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $r0, $r1, $r2, $r7, $lr
+  
+    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $lr = tMOVr $r0, 13, $noreg
+    t2DoLoopStart killed $r0
+    renamable $r0 = t2SUBri killed renamable $r1, 4, 14, $noreg, $noreg
+    renamable $r1 = t2SUBri killed renamable $r2, 4, 14, $noreg, $noreg
+  
+  bb.1.while.body:
+    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $r0, $r1
+  
+    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep2)
+    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep6)
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.1
+    t2B %bb.2, 14, $noreg
+  
+  bb.2.while.end:
+    $r0 = t2MOVi 0, 14, $noreg, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r7, def $pc, implicit killed $r0
+
+...

Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/massive.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,145 @@
+# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: for.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  ; Function Attrs: norecurse nounwind
+  define dso_local arm_aapcscc void @massive(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+  entry:
+    %cmp8 = icmp eq i32 %N, 0
+    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:                               ; preds = %entry
+    %scevgep = getelementptr i32, i32* %a, i32 -1
+    %scevgep4 = getelementptr i32, i32* %c, i32 -1
+    %scevgep8 = getelementptr i32, i32* %b, i32 -1
+    call void @llvm.set.loop.iterations.i32(i32 %N)
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    ret void
+  
+  for.body:                                         ; preds = %for.body, %for.body.preheader
+    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
+    %size = call i32 @llvm.arm.space(i32 4096, i32 undef)
+    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
+    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
+    %mul = mul nsw i32 %2, %1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+    store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
+    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.body, label %for.cond.cleanup
+  }
+  
+  declare i32 @llvm.arm.space(i32, i32) #1
+  declare void @llvm.set.loop.iterations.i32(i32) #2
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+  
+  attributes #1 = { nounwind }
+  attributes #2 = { noduplicate nounwind }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"int", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            massive
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+  
+    frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $r7 = frame-setup tMOVr $sp, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_register $r7
+    tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
+    t2IT 0, 8, implicit-def $itstate
+    tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
+    renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
+    renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
+    $lr = tMOVr $r3, 14, $noreg
+    t2DoLoopStart killed $r3
+  
+  bb.1.for.body:
+    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  
+    dead renamable $r3 = SPACE 4096, undef renamable $r0
+    renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
+    renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
+    renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
+    early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.1
+    tB %bb.2, 14, $noreg
+  
+  bb.2.for.cond.cleanup:
+    tPOP_RET 14, $noreg, def $r7, def $pc
+
+...

Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/multiblock-massive.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple=armv8.1m.main -mattr=+lob -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: for.body:
+# CHECK-NOT: t2DLS
+# CHECK-NOT: t2LEUpdate
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr {
+  entry:
+    %cmp8 = icmp eq i32 %N, 0
+    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:                               ; preds = %entry
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.end, %entry
+    ret void
+  
+  for.body:                                         ; preds = %for.body.preheader, %for.end
+    %lsr.iv4 = phi i32* [ %b, %for.body.preheader ], [ %scevgep5, %for.end ]
+    %lsr.iv2 = phi i32* [ %c, %for.body.preheader ], [ %scevgep3, %for.end ]
+    %lsr.iv1 = phi i32* [ %a, %for.body.preheader ], [ %scevgep, %for.end ]
+    %lsr.iv = phi i32 [ %N, %for.body.preheader ], [ %lsr.iv.next, %for.end ]
+    %size = call i32 @llvm.arm.space(i32 3072, i32 undef)
+    %0 = load i32, i32* %lsr.iv4, align 4, !tbaa !3
+    %1 = load i32, i32* %lsr.iv2, align 4, !tbaa !3
+    %mul = mul nsw i32 %1, %0
+    store i32 %mul, i32* %lsr.iv1, align 4, !tbaa !3
+    %cmp = icmp ne i32 %0, 0
+    br i1 %cmp, label %middle.block, label %for.end
+  
+  middle.block:                                     ; preds = %for.body
+    %div = udiv i32 %1, %0
+    store i32 %div, i32* %lsr.iv1, align 4, !tbaa !3
+    %size.1 = call i32 @llvm.arm.space(i32 1024, i32 undef)
+    br label %for.end
+  
+  for.end:                                          ; preds = %middle.block, %for.body
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv2, i32 1
+    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
+    %exitcond = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond, label %for.cond.cleanup, label %for.body
+  }
+  
+  declare i32 @llvm.arm.space(i32, i32) #1
+  attributes #1 = { nounwind }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"int", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            size_limit
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: -8
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.3(0x50000000)
+  
+    frame-setup tPUSH 14, $noreg, killed $r4, killed $r6, $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r6, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    $r7 = frame-setup tADDrSPi $sp, 2, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa $r7, 8
+    tCBNZ $r3, %bb.3
+  
+  bb.1.for.cond.cleanup:
+    tPOP_RET 14, $noreg, def $r4, def $r6, def $r7, def $pc
+  
+  bb.2.for.end:
+    successors: %bb.1(0x04000000), %bb.3(0x7c000000)
+  
+    renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg
+    renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 4, 14, $noreg
+    renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg
+    renamable $r3, $cpsr = tSUBi8 killed renamable $r3, 1, 14, $noreg
+    tBcc %bb.1, 0, killed $cpsr
+  
+  bb.3.for.body:
+    successors: %bb.4(0x50000000), %bb.2(0x30000000)
+  
+    dead renamable $r12 = SPACE 3072, undef renamable $r0
+    renamable $r12 = t2LDRi12 renamable $r1, 0, 14, $noreg :: (load 4 from %ir.lsr.iv4, !tbaa !3)
+    renamable $lr = t2LDRi12 renamable $r2, 0, 14, $noreg :: (load 4 from %ir.lsr.iv2, !tbaa !3)
+    t2CMPri renamable $r12, 0, 14, $noreg, implicit-def $cpsr
+    renamable $r4 = nsw t2MUL renamable $lr, renamable $r12, 14, $noreg
+    tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
+    t2Bcc %bb.2, 0, killed $cpsr
+  
+  bb.4.middle.block:
+    successors: %bb.2(0x80000000)
+  
+    renamable $r4 = t2UDIV killed renamable $lr, killed renamable $r12, 14, $noreg
+    tSTRi killed renamable $r4, renamable $r0, 0, 14, $noreg :: (store 4 into %ir.lsr.iv1, !tbaa !3)
+    dead renamable $r4 = SPACE 1024, undef renamable $r0
+    t2B %bb.2, 14, $noreg
+
+...

Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-call.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,141 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s
+
+# CHECK: .LBB0_2:
+# CHECK:    sub.w  lr, lr, #1
+# CHECK:    mov [[TMP:r[0-9]+]], lr
+# CHECK:    bl  bar
+# CHECK:    mov lr, [[TMP]]
+# CHECK:    cmp.w lr, #0
+# CHECK:    bne{{.*}} .LBB0_2
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-arm-none-eabi"
+  
+  define i32 @skip_call(i32 %n) #0 {
+  entry:
+    %cmp6 = icmp eq i32 %n, 0
+    br i1 %cmp6, label %while.end, label %while.body.preheader
+  
+  while.body.preheader:                             ; preds = %entry
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %while.body
+  
+  while.body:                                       ; preds = %while.body, %while.body.preheader
+    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+    %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
+    %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
+    %add = add nsw i32 %call, %res.07
+    %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %2 = icmp ne i32 %1, 0
+    br i1 %2, label %while.body, label %while.end
+  
+  while.end:                                        ; preds = %while.body, %entry
+    %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
+    ret i32 %res.0.lcssa
+  }
+  
+  declare i32 @bar(...) local_unnamed_addr #0
+  declare void @llvm.set.loop.iterations.i32(i32) #1
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+  declare void @llvm.stackprotector(i8*, i8**) #2
+  
+  attributes #0 = { "target-features"="+mve.fp" }
+  attributes #1 = { noduplicate nounwind }
+  attributes #2 = { nounwind }
+
+...
+---
+name:            skip_call
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.3(0x50000000)
+    liveins: $r0, $r4, $r5, $r7, $lr
+  
+    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r5, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    t2CMPri $r0, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.1, 0, killed $cpsr
+  
+  bb.3.while.body.preheader:
+    successors: %bb.4(0x80000000)
+    liveins: $r0
+  
+    $lr = tMOVr $r0, 14, $noreg
+    renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+    t2DoLoopStart killed $r0
+  
+  bb.4.while.body:
+    successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $r4
+  
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    $r5 = tMOVr killed $lr, 14, $noreg
+    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0
+    $lr = tMOVr killed $r5, 14, $noreg
+    renamable $r4 = nsw t2ADDrr killed renamable $r0, killed renamable $r4, 14, $noreg, $noreg
+    t2LoopEnd renamable $lr, %bb.4
+    t2B %bb.2, 14, $noreg
+  
+  bb.2.while.end:
+    liveins: $r4
+  
+    $r0 = tMOVr killed $r4, 14, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+  
+  bb.1:
+    renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+    $r0 = tMOVr killed $r4, 14, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+...

Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,139 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s
+
+# CHECK: .LBB0_2:
+# CHECK:    sub.w  lr, lr, #1
+# CHECK:    str.w lr, [sp, #12]
+# CHECK:    ldr.w lr, [sp, #12]
+# CHECK:    cmp.w lr, #0
+# CHECK:    bne{{.*}} .LBB0_2
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-arm-none-eabi"
+  
+  define i32 @skip_spill(i32 %n) #0 {
+  entry:
+    %cmp6 = icmp eq i32 %n, 0
+    br i1 %cmp6, label %while.end, label %while.body.preheader
+  
+  while.body.preheader:                             ; preds = %entry
+    call void @llvm.set.loop.iterations.i32(i32 %n)
+    br label %while.body
+  
+  while.body:                                       ; preds = %while.body, %while.body.preheader
+    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+    %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
+    %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
+    %add = add nsw i32 %call, %res.07
+    %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %2 = icmp ne i32 %1, 0
+    br i1 %2, label %while.body, label %while.end
+  
+  while.end:                                        ; preds = %while.body, %entry
+    %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
+    ret i32 %res.0.lcssa
+  }
+  
+  declare i32 @bar(...) local_unnamed_addr #0
+  declare void @llvm.set.loop.iterations.i32(i32) #1
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+  declare void @llvm.stackprotector(i8*, i8**) #2
+  
+  attributes #0 = { "target-features"="+mve.fp" }
+  attributes #1 = { noduplicate nounwind }
+  attributes #2 = { nounwind }
+
+...
+---
+name:            skip_spill
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.3(0x50000000)
+    liveins: $r0, $r4, $r5, $r7, $lr
+  
+    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r5, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    t2CMPri $r0, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.1, 0, killed $cpsr
+  
+  bb.3.while.body.preheader:
+    successors: %bb.4(0x80000000)
+    liveins: $r0
+  
+    $lr = tMOVr $r0, 14, $noreg
+    renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+    t2DoLoopStart killed $r0
+  
+  bb.4.while.body:
+    successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $r4
+  
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2STRi12 $lr, %stack.0, 0, 14, $noreg :: (store 4)
+    $lr = t2LDRi12 %stack.0, 0, 14, $noreg :: (load 4)
+    renamable $r4 = nsw t2ADDrr renamable $lr, killed renamable $r4, 14, $noreg, $noreg
+    t2LoopEnd renamable $lr, %bb.4
+    t2B %bb.2, 14, $noreg
+  
+  bb.2.while.end:
+    liveins: $r4
+  
+    $r0 = tMOVr killed $r4, 14, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+  
+  bb.1:
+    renamable $r4 = t2MOVi 0, 14, $noreg, $noreg
+    $r0 = tMOVr killed $r4, 14, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0
+
+...

Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/simple-do.ll Tue Jun 25 03:45:51 2019
@@ -1,6 +1,7 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=-lob -hardware-loops %s -S -o - | FileCheck %s --check-prefix=DISABLED
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
 
 ; DISABLED-NOT: llvm.set.loop.iterations
 ; DISABLED-NOT: llvm.loop.decrement
@@ -15,6 +16,15 @@
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
 ; CHECK: br i1 [[CMP]], label %while.body, label %while.end
+
+; CHECK-LLC-LABEL:do_copy:
+; CHECK-LLC-NOT:    mov lr, r0
+; CHECK-LLC:        dls lr, r0
+; CHECK-LLC-NOT:    mov lr, r0
+; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC:        le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9._]+]]
+; CHECK-LLC:      @ %while.end
 define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) {
 entry:
   br label %while.body
@@ -45,6 +55,14 @@ while.end:
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
 ; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
 
+; CHECK-LLC-LABEL:do_inc1:
+; CHECK-LLC:        dls lr,
+; CHECK-LLC-NOT:    mov lr,
+; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC:        le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9_]+]]
+; CHECK-LLC:      [[LOOP_EXIT:\.LBB[0-9_]+]]:
+
 define i32 @do_inc1(i32 %n) {
 entry:
   %cmp7 = icmp eq i32 %n, 0
@@ -84,6 +102,16 @@ while.end:
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
 ; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
+
+; CHECK-LLC:      do_inc2:
+; CHECK-LLC-NOT:    mov lr,
+; CHECK-LLC:        dls lr,
+; CHECK-LLC-NOT:    mov lr,
+; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9._]+]]:
+; CHECK-LLC:        le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9._]+]]
+; CHECK-LLC:      [[LOOP_EXIT:\.LBB[0-9_]+]]:
+
 define i32 @do_inc2(i32 %n) {
 entry:
   %cmp7 = icmp sgt i32 %n, 0
@@ -127,6 +155,15 @@ while.end:
 ; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[REM]], i32 1)
 ; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
 ; CHECK: br i1 [[CMP]], label %while.body, label %while.end.loopexit
+
+; CHECK-LLC:      do_dec2
+; CHECK-LLC-NOT:    mov lr,
+; CHECK-LLC:        dls lr,
+; CHECK-LLC-NOT:    mov lr,
+; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9_]+]]:
+; CHECK-LLC:        le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT:    b .
+; CHECK-LLC:      @ %while.end
 define i32 @do_dec2(i32 %n) {
 entry:
   %cmp6 = icmp sgt i32 %n, 0

Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/size-limit.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,155 @@
+# RUN: llc -mtriple=armv8.1m.main -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
+# CHECK: entry:
+# CHECK: $lr = t2DLS
+# CHECK: for.body:
+# CHECK: $lr = t2LEUpdate renamable $lr
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  ; Function Attrs: norecurse nounwind
+  define dso_local arm_aapcscc void @size_limit(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+  entry:
+    %cmp8 = icmp eq i32 %N, 0
+    br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:                               ; preds = %entry
+    %scevgep = getelementptr i32, i32* %a, i32 -1
+    %scevgep4 = getelementptr i32, i32* %c, i32 -1
+    %scevgep8 = getelementptr i32, i32* %b, i32 -1
+    call void @llvm.set.loop.iterations.i32(i32 %N)
+    br label %for.body
+  
+  for.cond.cleanup:                                 ; preds = %for.body, %entry
+    ret void
+  
+  for.body:                                         ; preds = %for.body, %for.body.preheader
+    %lsr.iv9 = phi i32* [ %scevgep8, %for.body.preheader ], [ %scevgep10, %for.body ]
+    %lsr.iv5 = phi i32* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi i32* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.body ]
+    %size = call i32 @llvm.arm.space(i32 4072, i32 undef)
+    %scevgep11 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %1 = load i32, i32* %scevgep11, align 4, !tbaa !3
+    %scevgep7 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %2 = load i32, i32* %scevgep7, align 4, !tbaa !3
+    %mul = mul nsw i32 %2, %1
+    %scevgep3 = getelementptr i32, i32* %lsr.iv1, i32 1
+    store i32 %mul, i32* %scevgep3, align 4, !tbaa !3
+    %scevgep2 = getelementptr i32, i32* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr i32, i32* %lsr.iv5, i32 1
+    %scevgep10 = getelementptr i32, i32* %lsr.iv9, i32 1
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.body, label %for.cond.cleanup
+  }
+  
+  ; Function Attrs: nounwind
+  declare i32 @llvm.arm.space(i32, i32) #1
+  
+  ; Function Attrs: noduplicate nounwind
+  declare void @llvm.set.loop.iterations.i32(i32) #2
+  
+  ; Function Attrs: noduplicate nounwind
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
+  
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #1
+  
+  attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
+  attributes #1 = { nounwind }
+  attributes #2 = { noduplicate nounwind }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 1, !"min_enum_size", i32 4}
+  !2 = !{!"clang version 9.0.0 (http://llvm.org/git/clang.git a9c7c0fc5d468f3d18a5c6beb697ab0d5be2ff4c) (http://llvm.org/git/llvm.git f34bff0c141a04a5182d57e2cfb1e4bc582c81b0)"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"int", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+
+...
+---
+name:            size_limit
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: false
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+  - { reg: '$r2', virtual-reg: '' }
+  - { reg: '$r3', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+  
+    frame-setup tPUSH 14, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $r7 = frame-setup tMOVr $sp, 14, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_register $r7
+    tCMPi8 $r3, 0, 14, $noreg, implicit-def $cpsr
+    t2IT 0, 8, implicit-def $itstate
+    tPOP_RET 0, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+    renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
+    renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
+    renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14, $noreg
+    $lr = tMOVr $r3, 14, $noreg
+    t2DoLoopStart killed $r3
+  
+  bb.1.for.body:
+    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  
+    dead renamable $r3 = SPACE 4072, undef renamable $r0
+    renamable $r12, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep11, !tbaa !3)
+    renamable $r3, renamable $r2 = t2LDR_PRE killed renamable $r2, 4, 14, $noreg :: (load 4 from %ir.scevgep7, !tbaa !3)
+    renamable $r3 = nsw t2MUL killed renamable $r3, killed renamable $r12, 14, $noreg
+    early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep3, !tbaa !3)
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2LoopEnd renamable $lr, %bb.1
+    tB %bb.2, 14, $noreg
+  
+  bb.2.for.cond.cleanup:
+    tPOP_RET 14, $noreg, def $r7, def $pc
+
+...

Modified: llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll?rev=364288&r1=364287&r2=364288&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll (original)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/structure.ll Tue Jun 25 03:45:51 2019
@@ -1,4 +1,6 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops -disable-arm-loloops=false %s -S -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -disable-arm-loloops=false %s -o - | FileCheck %s --check-prefix=CHECK-LLC
+; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false | FileCheck %s --check-prefix=CHECK-UNROLL
 
 ; CHECK-LABEL: early_exit
 ; CHECK-NOT: llvm.set.loop.iterations
@@ -43,6 +45,16 @@ do.end:
 
 ; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
 ; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7
+
+; CHECK-LLC:      nested:
+; CHECK-LLC-NOT:    mov lr, r1
+; CHECK-LLC:        dls lr, r1
+; CHECK-LLC-NOT:    mov lr, r1
+; CHECK-LLC:      [[LOOP_HEADER:\.LBB[0-9._]+]]:
+; CHECK-LLC:        le lr, [[LOOP_HEADER]]
+; CHECK-LLC-NOT:    b [[LOOP_EXIT:\.LBB[0-9._]+]]
+; CHECK-LLC:      [[LOOP_EXIT:\.LBB[0-9._]+]]:
+
 define void @nested(i32* nocapture %A, i32 %N) {
 entry:
   %cmp20 = icmp eq i32 %N, 0
@@ -210,6 +222,171 @@ exit:
   ret void
 }
 
+; CHECK-LABEL: search
+; CHECK: for.body.preheader:
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: br label %for.body
+; CHECK: for.body:
+; CHECK: for.inc:
+; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32
+; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0
+; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup
+define i32 @search(i8* nocapture readonly %c, i32 %N) {
+entry:
+  %cmp11 = icmp eq i32 %N, 0
+  br i1 %cmp11, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
+  %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
+  %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
+  ret i32 %sub
+
+for.body:
+  %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ]
+  %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ]
+  %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014
+  %0 = load i8, i8* %arrayidx, align 1
+  switch i8 %0, label %for.inc [
+    i8 108, label %sw.bb
+    i8 111, label %sw.bb
+    i8 112, label %sw.bb
+    i8 32, label %sw.bb1
+  ]
+
+sw.bb:                                            ; preds = %for.body, %for.body, %for.body
+  %inc = add nsw i32 %found.012, 1
+  br label %for.inc
+
+sw.bb1:                                           ; preds = %for.body
+  %inc2 = add nsw i32 %spaces.013, 1
+  br label %for.inc
+
+for.inc:                                          ; preds = %sw.bb, %sw.bb1, %for.body
+  %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
+  %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
+  %inc3 = add nuw i32 %i.014, 1
+  %exitcond = icmp eq i32 %inc3, %N
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: unroll_inc_int
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
+
+; TODO: We should be able to support the unrolled loop body.
+; CHECK-UNROLL-LABEL: unroll_inc_int:
+; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
+; CHECK-UNROLL-NOT: dls
+; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
+; CHECK-UNROLL-NOT: le lr, [[LOOP]]
+; CHECK-UNROLL:     bne [[LOOP]]
+; CHECK-UNROLL:     %for.body.epil.preheader
+; CHECK-UNROLL:     dls
+; CHECK-UNROLL:     %for.body.epil
+; CHECK-UNROLL:     le
+
+define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+entry:
+  %cmp8 = icmp sgt i32 %N, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul = mul nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: unroll_inc_unsigned
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
+
+; CHECK-LLC-LABEL: unroll_inc_unsigned:
+; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]]
+; CHECK-LLC: le  lr
+
+; TODO: We should be able to support the unrolled loop body.
+; CHECK-UNROLL-LABEL: unroll_inc_unsigned:
+; CHECK-UNROLL:     [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader
+; CHECK-UNROLL-NOT: dls
+; CHECK-UNROLL:     [[LOOP:.LBB[0-9_]+]]: @ %for.body
+; CHECK-UNROLL-NOT: le lr, [[LOOP]]
+; CHECK-UNROLL:     bne [[LOOP]]
+; CHECK-UNROLL:     %for.body.epil.preheader
+; CHECK-UNROLL:     dls
+; CHECK-UNROLL:     %for.body.epil
+; CHECK-UNROLL:     le
+define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+entry:
+  %cmp8 = icmp eq i32 %N, 0
+  br i1 %cmp8, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul = mul nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
+  store i32 %mul, i32* %arrayidx2, align 4
+  %inc = add nuw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: unroll_dec_int
+; CHECK: call void @llvm.set.loop.iterations.i32(i32 %N)
+; CHECK: call i32 @llvm.loop.decrement.reg.i32.i32.i32(
+
+; TODO: An unnecessary register is being held to hold COUNT, lr should just
+; be used instead.
+; CHECK-LLC-LABEL: unroll_dec_int:
+; CHECK-LLC: dls lr, [[COUNT:r[0-9]+]]
+; CHECK-LLC: subs  [[COUNT]], #1
+; CHECK-LLC: le  lr
+
+; CHECK-UNROLL-LABEL: unroll_dec_int
+; CHECK-UNROLL: dls lr
+; CHECK-UNROLL: le lr
+; CHECK-UNROLL: dls lr
+; CHECK-UNROLL: le lr
+define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) {
+entry:
+  %cmp8 = icmp sgt i32 %N, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul = mul nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09
+  store i32 %mul, i32* %arrayidx2, align 4
+  %dec = add nsw i32 %i.09, -1
+  %cmp = icmp sgt i32 %dec, 0
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
 
 declare void @llvm.set.loop.iterations.i32(i32) #0
 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0

Added: llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir?rev=364288&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir (added)
+++ llvm/trunk/test/Transforms/HardwareLoops/ARM/switch.mir Tue Jun 25 03:45:51 2019
@@ -0,0 +1,198 @@
+# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops -o -
+# CHECK:      bb.1.for.body.preheader:
+# CHECK:        $lr = t2DLS
+# CHECK-NOT:    t2LoopDec
+# CHECK:      bb.6.for.inc:
+# CHECK:        $lr = t2LEUpdate renamable $lr, %bb.2
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv8.1m.main-unknown-unknown"
+  
+  ; Function Attrs: norecurse nounwind readonly
+  define dso_local arm_aapcscc i32 @search(i8* nocapture readonly %c, i32 %N) local_unnamed_addr #0 {
+  entry:
+    %cmp11 = icmp eq i32 %N, 0
+    br i1 %cmp11, label %for.cond.cleanup, label %for.body.preheader
+  
+  for.body.preheader:
+    call void @llvm.set.loop.iterations.i32(i32 %N)
+    br label %for.body
+  
+  for.cond.cleanup:
+    %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ]
+    %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ]
+    %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa
+    ret i32 %sub
+  
+  for.body:
+    %lsr.iv1 = phi i8* [ %c, %for.body.preheader ], [ %scevgep, %for.inc ]
+    %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %for.body.preheader ]
+    %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %for.body.preheader ]
+    %0 = phi i32 [ %N, %for.body.preheader ], [ %3, %for.inc ]
+    %1 = load i8, i8* %lsr.iv1, align 1
+    %2 = zext i8 %1 to i32
+    switch i32 %2, label %for.inc [
+      i32 108, label %sw.bb
+      i32 111, label %sw.bb
+      i32 112, label %sw.bb
+      i32 32, label %sw.bb1
+    ]
+  
+  sw.bb:
+    %inc = add nsw i32 %found.012, 1
+    br label %for.inc
+  
+  sw.bb1:
+    %inc2 = add nsw i32 %spaces.013, 1
+    br label %for.inc
+  
+  for.inc:
+    %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ]
+    %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ]
+    %scevgep = getelementptr i8, i8* %lsr.iv1, i32 1
+    %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
+    %4 = icmp ne i32 %3, 0
+    br i1 %4, label %for.body, label %for.cond.cleanup
+  }
+  
+  declare void @llvm.set.loop.iterations.i32(i32) #1
+  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
+  declare void @llvm.stackprotector(i8*, i8**) #2
+  
+  attributes #0 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+ras,+soft-float,+strict-align,+thumb-mode,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8d16,-fp-armv8d16sp,-fp-armv8sp,-fp16,-fp16fml,-fp64,-fpregs,-fullfp16,-neon,-vfp2,-vfp2d16,-vfp2d16sp,-vfp2sp,-vfp3,-vfp3d16,-vfp3d16sp,-vfp3sp,-vfp4,-vfp4d16,-vfp4d16sp,-vfp4sp" "unsafe-fp-math"="false" "use-soft-float"="true" }
+  attributes #1 = { noduplicate nounwind }
+  attributes #2 = { nounwind }
+
+...
+---
+name:            search
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:       []
+liveins:         
+  - { reg: '$r0', virtual-reg: '' }
+  - { reg: '$r1', virtual-reg: '' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       16
+  offsetAdjustment: -8
+  maxAlignment:    4
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, 
+      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x30000000), %bb.3(0x50000000)
+    liveins: $r0, $r1, $r4, $r6, $lr
+  
+    $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r6, $r7, killed $lr
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    frame-setup CFI_INSTRUCTION offset $r6, -12
+    frame-setup CFI_INSTRUCTION offset $r4, -16
+    $r7 = frame-setup t2ADDri $sp, 8, 14, $noreg, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa $r7, 8
+    t2CMPri $r1, 0, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.1, 0, killed $cpsr
+  
+  bb.3.for.body.preheader:
+    successors: %bb.4(0x80000000)
+    liveins: $r0, $r1
+  
+    $lr = tMOVr $r1, 14, $noreg
+    t2DoLoopStart killed $r1
+    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+    renamable $r12 = t2MOVi 1, 14, $noreg, $noreg
+    renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
+  
+  bb.4.for.body:
+    successors: %bb.5(0x26666665), %bb.6(0x5999999b)
+    liveins: $lr, $r0, $r1, $r2, $r12
+  
+    renamable $r3 = t2LDRBi12 renamable $r0, 0, 14, $noreg :: (load 1 from %ir.lsr.iv1)
+    renamable $r4 = t2SUBri renamable $r3, 108, 14, $noreg, $noreg
+    renamable $lr = t2LoopDec killed renamable $lr, 1
+    t2CMPri renamable $r4, 4, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.5, 8, killed $cpsr
+  
+  bb.6.for.body:
+    successors: %bb.7(0x6db6db6e), %bb.5(0x12492492)
+    liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r12
+  
+    renamable $r4 = t2LSLrr renamable $r12, killed renamable $r4, 14, $noreg, $noreg
+    t2TSTri killed renamable $r4, 25, 14, $noreg, implicit-def $cpsr
+    t2Bcc %bb.5, 0, killed $cpsr
+  
+  bb.7.sw.bb:
+    successors: %bb.8(0x80000000)
+    liveins: $lr, $r0, $r1, $r2, $r12
+  
+    renamable $r2 = nsw t2ADDri killed renamable $r2, 1, 14, $noreg, $noreg
+    t2B %bb.8, 14, $noreg
+  
+  bb.5.for.body:
+    successors: %bb.8(0x80000000)
+    liveins: $lr, $r0, $r1, $r2, $r3, $r12
+  
+    t2CMPri killed renamable $r3, 32, 14, $noreg, implicit-def $cpsr
+    BUNDLE implicit-def dead $itstate, implicit-def $r1, implicit killed $r1, implicit killed $cpsr {
+      t2IT 0, 8, implicit-def $itstate
+      renamable $r1 = nsw t2ADDri killed renamable $r1, 1, 0, killed $cpsr, $noreg, implicit $r1, implicit internal killed $itstate
+    }
+  
+  bb.8.for.inc:
+    successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $r0, $r1, $r2, $r12
+  
+    renamable $r0 = t2ADDri killed renamable $r0, 1, 14, $noreg, $noreg
+    t2LoopEnd renamable $lr, %bb.4
+    t2B %bb.2, 14, $noreg
+  
+  bb.2.for.cond.cleanup:
+    liveins: $r1, $r2
+  
+    renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
+  
+  bb.1:
+    renamable $r2 = t2MOVi 0, 14, $noreg, $noreg
+    renamable $r1 = t2MOVi 0, 14, $noreg, $noreg
+    renamable $r0 = nsw t2SUBrr killed renamable $r2, killed renamable $r1, 14, $noreg, $noreg
+    $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r6, def $r7, def $pc, implicit killed $r0
+
+...




More information about the llvm-commits mailing list