[llvm] r372111 - [ARM][LowOverheadLoops] Add LR def safety check

Galina Kistanova via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 18 11:23:04 PDT 2019


Hello Sam,

It looks like this commit added broken test to the builder
llvm-clang-x86_64-expensive-checks-win.
http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/19705
:
. . .
Failing Tests (..):
    . . .
    LLVM :: CodeGen/Thumb2/LowOverheadLoops/massive.mir

The builder was already red and did not send notifications on this.
For now it's the only broken test on the builder.
Please have a look?

Thanks

Galina

On Tue, Sep 17, 2019 at 5:17 AM Sam Parker via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: sam_parker
> Date: Tue Sep 17 05:19:32 2019
> New Revision: 372111
>
> URL: http://llvm.org/viewvc/llvm-project?rev=372111&view=rev
> Log:
> [ARM][LowOverheadLoops] Add LR def safety check
>
> Converting the *LoopStart pseudo instructions into DLS/WLS results in
> LR being defined. These instructions were inserted on the assumption
> that LR would already contain the loop counter because a mov is
> introduced during ISel as the the consumers in the loop can only use
> LR. That assumption proved wrong!
>
> So perform a safety check, finding an appropriate place to insert the
> DLS/WLS instructions or revert if this isn't possible.
>
> Differential Revision: https://reviews.llvm.org/D67539
>
> Added:
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir
> Removed:
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir
> Modified:
>     llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir
>
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir
>     llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir
>
> Modified: llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMLowOverheadLoops.cpp Tue Sep 17 05:19:32
> 2019
> @@ -34,6 +34,7 @@ using namespace llvm;
>  namespace {
>
>    class ARMLowOverheadLoops : public MachineFunctionPass {
> +    MachineFunction           *MF = nullptr;
>      const ARMBaseInstrInfo    *TII = nullptr;
>      MachineRegisterInfo       *MRI = nullptr;
>      std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
> @@ -51,9 +52,21 @@ namespace {
>
>      bool runOnMachineFunction(MachineFunction &MF) override;
>
> +    MachineFunctionProperties getRequiredProperties() const override {
> +      return MachineFunctionProperties().set(
> +          MachineFunctionProperties::Property::NoVRegs);
> +    }
> +
> +    StringRef getPassName() const override {
> +      return ARM_LOW_OVERHEAD_LOOPS_NAME;
> +    }
> +
> +  private:
>      bool ProcessLoop(MachineLoop *ML);
>
> -    bool RevertNonLoops(MachineFunction &MF);
> +    MachineInstr * IsSafeToDefineLR(MachineInstr *MI);
> +
> +    bool RevertNonLoops();
>
>      void RevertWhile(MachineInstr *MI) const;
>
> @@ -62,16 +75,9 @@ namespace {
>      void RevertLoopEnd(MachineInstr *MI) const;
>
>      void Expand(MachineLoop *ML, MachineInstr *Start,
> -                MachineInstr *Dec, MachineInstr *End, bool Revert);
> -
> -    MachineFunctionProperties getRequiredProperties() const override {
> -      return MachineFunctionProperties().set(
> -          MachineFunctionProperties::Property::NoVRegs);
> -    }
> +                MachineInstr *InsertPt, MachineInstr *Dec,
> +                MachineInstr *End, bool Revert);
>
> -    StringRef getPassName() const override {
> -      return ARM_LOW_OVERHEAD_LOOPS_NAME;
> -    }
>    };
>  }
>
> @@ -80,26 +86,28 @@ char ARMLowOverheadLoops::ID = 0;
>  INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE,
> ARM_LOW_OVERHEAD_LOOPS_NAME,
>                  false, false)
>
> -bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) {
> -  if (!static_cast<const ARMSubtarget&>(MF.getSubtarget()).hasLOB())
> +bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
> +  const ARMSubtarget &ST = static_cast<const
> ARMSubtarget&>(mf.getSubtarget());
> +  if (!ST.hasLOB())
>      return false;
>
> -  LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " -------------
> \n");
> +  MF = &mf;
> +  LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << "
> ------------- \n");
>
>    auto &MLI = getAnalysis<MachineLoopInfo>();
> -  MRI = &MF.getRegInfo();
> -  TII = static_cast<const ARMBaseInstrInfo*>(
> -    MF.getSubtarget().getInstrInfo());
> -  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new
> ARMBasicBlockUtils(MF));
> +
> MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
> +  MRI = &MF->getRegInfo();
> +  TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
> +  BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new
> ARMBasicBlockUtils(*MF));
>    BBUtils->computeAllBlockSizes();
> -  BBUtils->adjustBBOffsetsAfter(&MF.front());
> +  BBUtils->adjustBBOffsetsAfter(&MF->front());
>
>    bool Changed = false;
>    for (auto ML : MLI) {
>      if (!ML->getParentLoop())
>        Changed |= ProcessLoop(ML);
>    }
> -  Changed |= RevertNonLoops(MF);
> +  Changed |= RevertNonLoops();
>    return Changed;
>  }
>
> @@ -108,6 +116,100 @@ static bool IsLoopStart(MachineInstr &MI
>           MI.getOpcode() == ARM::t2WhileLoopStart;
>  }
>
> +template<typename T>
> +static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned
> Reg) {
> +  for(auto &MI : make_range(T(Begin), End)) {
> +    for (auto &MO : MI.operands()) {
> +      if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
> +        continue;
> +      return &MI;
> +    }
> +  }
> +  return nullptr;
> +}
> +
> +static MachineInstr* SearchForUse(MachineInstr *Begin,
> +                                  MachineBasicBlock::iterator End,
> +                                  unsigned Reg) {
> +  for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) {
> +    for (auto &MO : MI.operands()) {
> +      if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
> +        continue;
> +      return &MI;
> +    }
> +  }
> +  return nullptr;
> +}
> +
> +// Is it safe to define LR with DLS/WLS?
> +// LR can defined if it is the operand to start, because it's the same
> value,
> +// or if it's going to be equivalent to the operand to Start.
> +MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) {
> +
> +  auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) {
> +    return MI->getOpcode() == ARM::tMOVr &&
> +           MI->getOperand(0).getReg() == ARM::LR &&
> +           MI->getOperand(1).getReg() == Reg &&
> +           MI->getOperand(2).getImm() == ARMCC::AL;
> +   };
> +
> +  MachineBasicBlock *MBB = Start->getParent();
> +  unsigned CountReg = Start->getOperand(0).getReg();
> +  // Walk forward and backward in the block to find the closest
> instructions
> +  // that define LR. Then also filter them out if they're not a mov lr.
> +  MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR);
> +  if (PredLRDef && !IsMoveLR(PredLRDef, CountReg))
> +    PredLRDef = nullptr;
> +
> +  MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR);
> +  if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg))
> +    SuccLRDef = nullptr;
> +
> +  // We've either found one, two or none mov lr instructions... Now
> figure out
> +  // if they are performing the equilvant mov that the Start instruction
> will.
> +  // Do this by scanning forward and backward to see if there's a def of
> the
> +  // register holding the count value. If we find a suitable def, return
> it as
> +  // the insert point. Later, if InsertPt != Start, then we can remove the
> +  // redundant instruction.
> +  if (SuccLRDef) {
> +    MachineBasicBlock::iterator End(SuccLRDef);
> +    if (!SearchForDef(Start, End, CountReg)) {
> +      return SuccLRDef;
> +    } else
> +      SuccLRDef = nullptr;
> +  }
> +  if (PredLRDef) {
> +    MachineBasicBlock::reverse_iterator End(PredLRDef);
> +    if (!SearchForDef(Start, End, CountReg)) {
> +      return PredLRDef;
> +    } else
> +      PredLRDef = nullptr;
> +  }
> +
> +  // We can define LR because LR already contains the same value.
> +  if (Start->getOperand(0).getReg() == ARM::LR)
> +    return Start;
> +
> +  // We've found no suitable LR def and Start doesn't use LR directly.
> Can we
> +  // just define LR anyway?
> +  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
> +  LivePhysRegs LiveRegs(*TRI);
> +  LiveRegs.addLiveOuts(*MBB);
> +
> +  // Not if we've haven't found a suitable mov and LR is live out.
> +  if (LiveRegs.contains(ARM::LR))
> +    return nullptr;
> +
> +  // If LR is not live out, we can insert the instruction if nothing else
> +  // uses LR after it.
> +  if (!SearchForUse(Start, MBB->end(), ARM::LR))
> +    return Start;
> +
> +  LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion
> point for"
> +             << " LR\n");
> +  return nullptr;
> +}
> +
>  bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
>
>    bool Changed = false;
> @@ -169,11 +271,13 @@ bool ARMLowOverheadLoops::ProcessLoop(Ma
>          End = &MI;
>        else if (IsLoopStart(MI))
>          Start = &MI;
> -      else if (MI.getDesc().isCall())
> +      else if (MI.getDesc().isCall()) {
>          // TODO: Though the call will require LE to execute again, does
> this
>          // mean we should revert? Always executing LE hopefully should be
>          // faster than performing a sub,cmp,br or even subs,br.
>          Revert = true;
> +        LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
> +      }
>
>        if (!Dec || End)
>          continue;
> @@ -237,7 +341,14 @@ bool ARMLowOverheadLoops::ProcessLoop(Ma
>      Revert = true;
>    }
>
> -  Expand(ML, Start, Dec, End, Revert);
> +  MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start);
> +  if (!InsertPt) {
> +    LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion
> point.\n");
> +    Revert = true;
> +  } else
> +    LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " <<
> *InsertPt);
> +
> +  Expand(ML, Start, InsertPt, Dec, End, Revert);
>    return true;
>  }
>
> @@ -304,33 +415,13 @@ void ARMLowOverheadLoops::RevertLoopEnd(
>  }
>
>  void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
> +                                 MachineInstr *InsertPt,
>                                   MachineInstr *Dec, MachineInstr *End,
>                                   bool Revert) {
>
> -  auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start) {
> -    // The trip count should already been held in LR since the
> instructions
> -    // within the loop can only read and write to LR. So, there should be
> a
> -    // mov to setup the count. WLS/DLS perform this move, so find the
> original
> -    // and delete it - inserting WLS/DLS in its place.
> -    MachineBasicBlock *MBB = Start->getParent();
> -    MachineInstr *InsertPt = Start;
> -    for (auto &I : MRI->def_instructions(ARM::LR)) {
> -      if (I.getParent() != MBB)
> -        continue;
> -
> -      // Always execute.
> -      if (!I.getOperand(2).isImm() || I.getOperand(2).getImm() !=
> ARMCC::AL)
> -        continue;
> -
> -      // Only handle move reg, if the trip count it will need moving into
> a reg
> -      // before the setup instruction anyway.
> -      if (!I.getDesc().isMoveReg() ||
> -          !I.getOperand(1).isIdenticalTo(Start->getOperand(0)))
> -        continue;
> -      InsertPt = &I;
> -      break;
> -    }
> -
> +  auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start,
> +                                MachineInstr *InsertPt) {
> +    MachineBasicBlock *MBB = InsertPt->getParent();
>      unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
>        ARM::t2DLS : ARM::t2WLS;
>      MachineInstrBuilder MIB =
> @@ -389,18 +480,18 @@ void ARMLowOverheadLoops::Expand(Machine
>      RevertLoopDec(Dec);
>      RevertLoopEnd(End);
>    } else {
> -    Start = ExpandLoopStart(ML, Start);
> +    Start = ExpandLoopStart(ML, Start, InsertPt);
>      RemoveDeadBranch(Start);
>      End = ExpandLoopEnd(ML, Dec, End);
>      RemoveDeadBranch(End);
>    }
>  }
>
> -bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) {
> +bool ARMLowOverheadLoops::RevertNonLoops() {
>    LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
>    bool Changed = false;
>
> -  for (auto &MBB : MF) {
> +  for (auto &MBB : *MF) {
>      SmallVector<MachineInstr*, 4> Starts;
>      SmallVector<MachineInstr*, 4> Decs;
>      SmallVector<MachineInstr*, 4> Ends;
>
> Modified: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/cond-mov.mir Tue Sep
> 17 05:19:32 2019
> @@ -4,6 +4,9 @@
>  # CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
>
>  --- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
>    define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly
> %q) {
>    entry:
>      %scevgep = getelementptr i32, i32* %q, i32 -1
> @@ -15,10 +18,10 @@
>      %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
>      %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
>      %0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
> -    %scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1
> -    %scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1
> -    %1 = load i32, i32* %scevgep7, align 4
> -    store i32 %1, i32* %scevgep4, align 4
> +    %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %1 = load i32, i32* %scevgep6, align 4
> +    store i32 %1, i32* %scevgep2, align 4
>      %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
>      %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
>      %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
> @@ -44,7 +47,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -84,6 +87,7 @@ machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.1(0x80000000)
> +    liveins: $r0, $r1, $r2, $r7, $lr
>
>      frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
>      frame-setup CFI_INSTRUCTION def_cfa_offset 8
> @@ -96,9 +100,10 @@ body:             |
>
>    bb.1.while.body:
>      successors: %bb.1(0x7c000000), %bb.2(0x04000000)
> +    liveins: $lr, $r0, $r1
>
> -    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14,
> $noreg :: (load 4 from %ir.scevgep7)
> -    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed
> renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4)
> +    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14,
> $noreg :: (load 4 from %ir.scevgep6)
> +    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed
> renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
>      renamable $lr = t2LoopDec killed renamable $lr, 1
>      t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
>      tB %bb.2, 14, $noreg
> @@ -108,4 +113,3 @@ body:             |
>      tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
>
>  ...
> -
>
> Modified:
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> ---
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir
> (original)
> +++
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/end-positive-offset.mir Tue
> Sep 17 05:19:32 2019
> @@ -9,7 +9,10 @@
>  # CHECK: bb.2.for.cond.cleanup:
>  # CHECK: bb.3.for.header:
>
> ---- |
> +--- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
>    define void @size_limit(i32* nocapture %a, i32* nocapture readonly %b,
> i32* nocapture readonly %c, i32 %N) {
>    entry:
>      call void @llvm.set.loop.iterations.i32(i32 %N)
> @@ -45,9 +48,11 @@
>    }
>
>    ; Function Attrs: nounwind
> -  declare i32 @llvm.arm.space(i32 immarg, i32) #0
> +  declare i32 @llvm.arm.space(i32 immarg, i32) #0
> +
>    ; Function Attrs: noduplicate nounwind
> -  declare void @llvm.set.loop.iterations.i32(i32) #1
> +  declare void @llvm.set.loop.iterations.i32(i32) #1
> +
>    ; Function Attrs: noduplicate nounwind
>    declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
>
> @@ -63,7 +68,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -128,6 +133,7 @@ machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.3(0x80000000)
> +    liveins: $r0, $r1, $r2, $r3, $r7, $lr
>
>      frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
>      frame-setup CFI_INSTRUCTION def_cfa_offset 8
> @@ -184,5 +190,3 @@ body:             |
>      tB %bb.1, 14, $noreg
>
>  ...
> -
> -
>
> Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir?rev=372111&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir
> (added)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dls.mir Tue
> Sep 17 05:19:32 2019
> @@ -0,0 +1,115 @@
> +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s
> -verify-machineinstrs -o - | FileCheck %s
> +# CHECK: $lr = t2DLS $r0
> +# CHECK-NOT: $lr = tMOVr $r0
> +# CHECK: $lr = t2LEUpdate renamable $lr, %bb.1
> +
> +--- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
> +  define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly
> %q) {
> +  entry:
> +    %scevgep = getelementptr i32, i32* %q, i32 -1
> +    %scevgep3 = getelementptr i32, i32* %p, i32 -1
> +    call void @llvm.set.loop.iterations.i32(i32 %n)
> +    br label %while.body
> +
> +  while.body:                                       ; preds =
> %while.body, %entry
> +    %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ]
> +    %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ]
> +    %0 = phi i32 [ %n, %entry ], [ %2, %while.body ]
> +    %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %1 = load i32, i32* %scevgep6, align 4
> +    store i32 %1, i32* %scevgep2, align 4
> +    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
> +    %3 = icmp ne i32 %2, 0
> +    br i1 %3, label %while.body, label %while.end
> +
> +  while.end:                                        ; preds = %while.body
> +    ret i32 0
> +  }
> +
> +  declare void @llvm.set.loop.iterations.i32(i32) #0
> +  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
> +
> +  attributes #0 = { noduplicate nounwind }
> +  attributes #1 = { nounwind }
> +
> +...
> +---
> +name:            do_copy
> +alignment:       2
> +exposesReturnsTwice: false
> +legalized:       false
> +regBankSelected: false
> +selected:        false
> +failedISel:      false
> +tracksRegLiveness: true
> +hasWinCFI:       false
> +registers:       []
> +liveins:
> +  - { reg: '$r0', virtual-reg: '' }
> +  - { reg: '$r1', virtual-reg: '' }
> +  - { reg: '$r2', virtual-reg: '' }
> +frameInfo:
> +  isFrameAddressTaken: false
> +  isReturnAddressTaken: false
> +  hasStackMap:     false
> +  hasPatchPoint:   false
> +  stackSize:       8
> +  offsetAdjustment: 0
> +  maxAlignment:    4
> +  adjustsStack:    false
> +  hasCalls:        false
> +  stackProtector:  ''
> +  maxCallFrameSize: 0
> +  cvBytesOfCalleeSavedRegisters: 0
> +  hasOpaqueSPAdjustment: false
> +  hasVAStart:      false
> +  hasMustTailInVarArgFunc: false
> +  localFrameSize:  0
> +  savePoint:       ''
> +  restorePoint:    ''
> +fixedStack:      []
> +stack:
> +  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$lr',
> callee-saved-restored: false,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$r7',
> callee-saved-restored: true,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +callSites:       []
> +constants:       []
> +machineFunctionInfo: {}
> +body:             |
> +  bb.0.entry:
> +    successors: %bb.1(0x80000000)
> +    liveins: $r0, $r1, $r2, $r7, $lr
> +
> +    frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
> +    frame-setup CFI_INSTRUCTION def_cfa_offset 8
> +    frame-setup CFI_INSTRUCTION offset $lr, -4
> +    frame-setup CFI_INSTRUCTION offset $r7, -8
> +    t2DoLoopStart $r0
> +    $lr = tMOVr killed $r0, 14, $noreg
> +    renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
> +    renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
> +
> +  bb.1.while.body:
> +    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
> +    liveins: $lr, $r0, $r1
> +
> +    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14,
> $noreg :: (load 4 from %ir.scevgep6)
> +    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed
> renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
> +    renamable $lr = t2LoopDec killed renamable $lr, 1
> +    t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
> +    tB %bb.2, 14, $noreg
> +
> +  bb.2.while.end:
> +    $r0, dead $cpsr = tMOVi8 0, 14, $noreg
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
> +
> +...
>
> Modified:
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir
> (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.mir
> Tue Sep 17 05:19:32 2019
> @@ -5,8 +5,6 @@
>  # CHECK-NOT: t2LEUpdate
>
>  --- |
> -  ; ModuleID =
> '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll'
> -  source_filename =
> "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-call.ll"
>    target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
>    target triple = "thumbv8.1m.main"
>
> @@ -35,15 +33,9 @@
>
>    declare i32 @bar(...) local_unnamed_addr #0
>
> -  ; Function Attrs: noduplicate nounwind
>    declare void @llvm.set.loop.iterations.i32(i32) #1
> -
> -  ; Function Attrs: noduplicate nounwind
>    declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
>
> -  ; Function Attrs: nounwind
> -  declare void @llvm.stackprotector(i8*, i8**) #2
> -
>    attributes #0 = { "target-features"="+mve.fp" }
>    attributes #1 = { noduplicate nounwind }
>    attributes #2 = { nounwind }
> @@ -57,7 +49,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -101,6 +93,7 @@ machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.4(0x30000000), %bb.1(0x50000000)
> +    liveins: $r0, $r4, $r5, $r7, $lr
>
>      frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7,
> killed $lr, implicit-def $sp, implicit $sp
>      frame-setup CFI_INSTRUCTION def_cfa_offset 16
> @@ -112,6 +105,7 @@ body:             |
>
>    bb.1.while.body.preheader:
>      successors: %bb.2(0x80000000)
> +    liveins: $r0
>
>      $lr = tMOVr $r0, 14, $noreg
>      renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
> @@ -119,6 +113,7 @@ body:             |
>
>    bb.2.while.body:
>      successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr, $r4
>
>      $r5 = tMOVr killed $lr, 14, $noreg
>      tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp,
> implicit-def $sp, implicit-def $r0
> @@ -129,6 +124,8 @@ body:             |
>      tB %bb.3, 14, $noreg
>
>    bb.3.while.end:
> +    liveins: $r4
> +
>      $r0 = tMOVr killed $r4, 14, $noreg
>      tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
>
> @@ -138,4 +135,3 @@ body:             |
>      tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
>
>  ...
> -
>
> Modified:
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir
> (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-read.mir
> Tue Sep 17 05:19:32 2019
> @@ -4,7 +4,10 @@
>  # CHECK-NOT: t2DLS
>  # CHECK-NOT: t2LEUpdate
>
> ---- |
> +--- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
>    define i32 @mov_between_dec_end(i32 %n) #0 {
>    entry:
>      %cmp6 = icmp eq i32 %n, 0
> @@ -15,7 +18,6 @@
>      br label %while.body
>
>    while.body:                                       ; preds =
> %while.body, %while.body.preheader
> -    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
>      %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
>      %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
>      %add = add i32 %1, 0
> @@ -27,10 +29,7 @@
>      ret i32 %res.0.lcssa
>    }
>
> -  ; Function Attrs: noduplicate nounwind
>    declare void @llvm.set.loop.iterations.i32(i32) #1
> -
> -  ; Function Attrs: noduplicate nounwind
>    declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
>
>    attributes #0 = { "target-features"="+mve.fp" }
> @@ -46,7 +45,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -56,11 +55,11 @@ frameInfo:
>    isReturnAddressTaken: false
>    hasStackMap:     false
>    hasPatchPoint:   false
> -  stackSize:       16
> +  stackSize:       8
>    offsetAdjustment: 0
>    maxAlignment:    4
> -  adjustsStack:    true
> -  hasCalls:        true
> +  adjustsStack:    false
> +  hasCalls:        false
>    stackProtector:  ''
>    maxCallFrameSize: 0
>    cvBytesOfCalleeSavedRegisters: 0
> @@ -78,51 +77,46 @@ stack:
>    - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment:
> 4,
>        stack-id: default, callee-saved-register: '$r7',
> callee-saved-restored: true,
>        debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$r5',
> callee-saved-restored: true,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$r4',
> callee-saved-restored: true,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
>  callSites:       []
>  constants:       []
>  machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.4(0x30000000), %bb.1(0x50000000)
> +    liveins: $r0, $r7, $lr
>
> -    frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7,
> killed $lr, implicit-def $sp, implicit $sp
> -    frame-setup CFI_INSTRUCTION def_cfa_offset 16
> +    frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
> +    frame-setup CFI_INSTRUCTION def_cfa_offset 8
>      frame-setup CFI_INSTRUCTION offset $lr, -4
>      frame-setup CFI_INSTRUCTION offset $r7, -8
> -    frame-setup CFI_INSTRUCTION offset $r5, -12
> -    frame-setup CFI_INSTRUCTION offset $r4, -16
>      tCBZ $r0, %bb.4
>
>    bb.1.while.body.preheader:
>      successors: %bb.2(0x80000000)
> +    liveins: $r0
>
>      $lr = tMOVr $r0, 14, $noreg
> -    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
>      t2DoLoopStart killed $r0
>
>    bb.2.while.body:
>      successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr, $r4
>
> -    renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable
> $r0, 14, $noreg
> +    renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable
> $r4, 14, $noreg
>      renamable $lr = t2LoopDec killed renamable $lr, 1
>      renamable $r4 = tMOVr $lr, 14, $noreg
>      t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
>      tB %bb.3, 14, $noreg
>
>    bb.3.while.end:
> -    $r0 = tMOVr killed $r4, 14, $noreg
> -    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
> +    liveins: $lr
> +
> +    $r0 = tMOVr killed $lr, 14, $noreg
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
>
>    bb.4:
> -    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
> -    $r0 = tMOVr killed $r4, 14, $noreg
> -    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
> +    renamable $lr = t2MOVi 0, 14, $noreg, $noreg
> +    $r0 = tMOVr killed $lr, 14, $noreg
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
>
>  ...
> -
>
> Removed:
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir?rev=372110&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir
> (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-spill.mir
> (removed)
> @@ -1,136 +0,0 @@
> -# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops
> --verify-machineinstrs -o - | FileCheck %s
> -
> -# CHECK: while.body:
> -# CHECK-NOT: t2DLS
> -# CHECK-NOT: t2LEUpdate
> -
> ---- |
> -  define i32 @skip_spill(i32 %n) #0 {
> -  entry:
> -    %cmp6 = icmp eq i32 %n, 0
> -    br i1 %cmp6, label %while.end, label %while.body.preheader
> -
> -  while.body.preheader:                             ; preds = %entry
> -    call void @llvm.set.loop.iterations.i32(i32 %n)
> -    br label %while.body
> -
> -  while.body:                                       ; preds =
> %while.body, %while.body.preheader
> -    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
> -    %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
> -    %call = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)()
> -    %add = add nsw i32 %call, %res.07
> -    %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
> -    %2 = icmp ne i32 %1, 0
> -    br i1 %2, label %while.body, label %while.end
> -
> -  while.end:                                        ; preds =
> %while.body, %entry
> -    %res.0.lcssa = phi i32 [ 0, %entry ], [ %add, %while.body ]
> -    ret i32 %res.0.lcssa
> -  }
> -
> -  declare i32 @bar(...) local_unnamed_addr #0
> -
> -  ; Function Attrs: noduplicate nounwind
> -  declare void @llvm.set.loop.iterations.i32(i32) #1
> -
> -  ; Function Attrs: noduplicate nounwind
> -  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
> -
> -  ; Function Attrs: nounwind
> -  declare void @llvm.stackprotector(i8*, i8**) #2
> -
> -  attributes #0 = { "target-features"="+mve.fp" }
> -  attributes #1 = { noduplicate nounwind }
> -  attributes #2 = { nounwind }
> -
> -...
> ----
> -name:            skip_spill
> -alignment:       2
> -exposesReturnsTwice: false
> -legalized:       false
> -regBankSelected: false
> -selected:        false
> -failedISel:      false
> -tracksRegLiveness: false
> -hasWinCFI:       false
> -registers:       []
> -liveins:
> -  - { reg: '$r0', virtual-reg: '' }
> -frameInfo:
> -  isFrameAddressTaken: false
> -  isReturnAddressTaken: false
> -  hasStackMap:     false
> -  hasPatchPoint:   false
> -  stackSize:       16
> -  offsetAdjustment: 0
> -  maxAlignment:    4
> -  adjustsStack:    true
> -  hasCalls:        true
> -  stackProtector:  ''
> -  maxCallFrameSize: 0
> -  cvBytesOfCalleeSavedRegisters: 0
> -  hasOpaqueSPAdjustment: false
> -  hasVAStart:      false
> -  hasMustTailInVarArgFunc: false
> -  localFrameSize:  0
> -  savePoint:       ''
> -  restorePoint:    ''
> -fixedStack:      []
> -stack:
> -  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$lr',
> callee-saved-restored: false,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$r7',
> callee-saved-restored: true,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$r5',
> callee-saved-restored: true,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$r4',
> callee-saved-restored: true,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -callSites:       []
> -constants:       []
> -machineFunctionInfo: {}
> -body:             |
> -  bb.0.entry:
> -    successors: %bb.4(0x30000000), %bb.1(0x50000000)
> -
> -    frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7,
> killed $lr, implicit-def $sp, implicit $sp
> -    frame-setup CFI_INSTRUCTION def_cfa_offset 16
> -    frame-setup CFI_INSTRUCTION offset $lr, -4
> -    frame-setup CFI_INSTRUCTION offset $r7, -8
> -    frame-setup CFI_INSTRUCTION offset $r5, -12
> -    frame-setup CFI_INSTRUCTION offset $r4, -16
> -    tCBZ $r0, %bb.4
> -
> -  bb.1.while.body.preheader:
> -    successors: %bb.2(0x80000000)
> -
> -    $lr = tMOVr $r0, 14, $noreg
> -    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
> -    t2DoLoopStart killed $r0
> -
> -  bb.2.while.body:
> -    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> -
> -    $r5 = tMOVr killed $lr, 14, $noreg
> -    tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp,
> implicit-def $sp, implicit-def $r0
> -    $lr = tMOVr killed $r5, 14, $noreg
> -    renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable
> $r0, 14, $noreg
> -    renamable $lr = t2LoopDec killed renamable $lr, 1
> -    t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
> -    tB %bb.3, 14, $noreg
> -
> -  bb.3.while.end:
> -    $r0 = tMOVr killed $r4, 14, $noreg
> -    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
> -
> -  bb.4:
> -    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
> -    $r0 = tMOVr killed $r4, 14, $noreg
> -    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
> -
> -...
> -
>
> Modified:
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir
> (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-after-write.mir
> Tue Sep 17 05:19:32 2019
> @@ -4,7 +4,10 @@
>  # CHECK-NOT: t2DLS
>  # CHECK-NOT: t2LEUpdate
>
> ---- |
> +--- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
>    define i32 @mov_between_dec_end(i32 %n) #0 {
>    entry:
>      %cmp6 = icmp eq i32 %n, 0
> @@ -15,7 +18,6 @@
>      br label %while.body
>
>    while.body:                                       ; preds =
> %while.body, %while.body.preheader
> -    %res.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
>      %0 = phi i32 [ %n, %while.body.preheader ], [ %1, %while.body ]
>      %1 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
>      %add = add i32 %1, 2
> @@ -33,6 +35,9 @@
>    ; Function Attrs: noduplicate nounwind
>    declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1
>
> +  ; Function Attrs: nounwind
> +  declare void @llvm.stackprotector(i8*, i8**) #2
> +
>    attributes #0 = { "target-features"="+mve.fp" }
>    attributes #1 = { noduplicate nounwind }
>    attributes #2 = { nounwind }
> @@ -46,7 +51,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -56,11 +61,11 @@ frameInfo:
>    isReturnAddressTaken: false
>    hasStackMap:     false
>    hasPatchPoint:   false
> -  stackSize:       16
> +  stackSize:       8
>    offsetAdjustment: 0
>    maxAlignment:    4
> -  adjustsStack:    true
> -  hasCalls:        true
> +  adjustsStack:    false
> +  hasCalls:        false
>    stackProtector:  ''
>    maxCallFrameSize: 0
>    cvBytesOfCalleeSavedRegisters: 0
> @@ -78,51 +83,45 @@ stack:
>    - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment:
> 4,
>        stack-id: default, callee-saved-register: '$r7',
> callee-saved-restored: true,
>        debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -  - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$r5',
> callee-saved-restored: true,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> -  - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment:
> 4,
> -      stack-id: default, callee-saved-register: '$r4',
> callee-saved-restored: true,
> -      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
>  callSites:       []
>  constants:       []
>  machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.4(0x30000000), %bb.1(0x50000000)
> +    liveins: $r0, $r7, $lr
>
> -    frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7,
> killed $lr, implicit-def $sp, implicit $sp
> -    frame-setup CFI_INSTRUCTION def_cfa_offset 16
> +    frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
> +    frame-setup CFI_INSTRUCTION def_cfa_offset 8
>      frame-setup CFI_INSTRUCTION offset $lr, -4
>      frame-setup CFI_INSTRUCTION offset $r7, -8
> -    frame-setup CFI_INSTRUCTION offset $r5, -12
> -    frame-setup CFI_INSTRUCTION offset $r4, -16
>      tCBZ $r0, %bb.4
>
>    bb.1.while.body.preheader:
>      successors: %bb.2(0x80000000)
> +    liveins: $r0
>
>      $lr = tMOVr $r0, 14, $noreg
> -    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
>      t2DoLoopStart killed $r0
>
>    bb.2.while.body:
>      successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr
>
>      $r4 = tMOVr $lr, 14, $noreg
>      renamable $lr = t2LoopDec killed renamable $lr, 1
> +    renamable $r0 = t2ADDri renamable $lr, 2, 14, $noreg, $noreg
>      $lr = tMOVr $r4, 14, $noreg
>      t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
>      tB %bb.3, 14, $noreg
>
>    bb.3.while.end:
> -    $r0 = tMOVr killed $r4, 14, $noreg
> -    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
> +    liveins: $r0
> +
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
>
>    bb.4:
> -    renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg
> -    $r0 = tMOVr killed $r4, 14, $noreg
> -    tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit
> killed $r0
> +    renamable $r0, dead $cpsr = tMOVi8 0, 14, $noreg
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
>
>  ...
> -
>
> Modified:
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
> (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir
> Tue Sep 17 05:19:32 2019
> @@ -14,6 +14,9 @@
>  # CHECK: bb.4.while.end:
>
>  --- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
>    define void @non_loop(i16* nocapture %a, i16* nocapture readonly %b,
> i32 %N) {
>    entry:
>      %cmp = icmp ugt i32 %N, 2
> @@ -23,19 +26,19 @@
>      %test = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
>      br i1 %test, label %while.body.preheader, label %while.end
>
> -  while.body.preheader:                             ; preds = %entry,
> %not.preheader
> +  while.body.preheader:                             ; preds =
> %not.preheader, %entry
>      %scevgep = getelementptr i16, i16* %a, i32 -1
>      %scevgep3 = getelementptr i16, i16* %b, i32 -1
>      br label %while.body
>
> -  while.body:                                       ; preds =
> %while.body.preheader, %while.body
> +  while.body:                                       ; preds =
> %while.body, %while.body.preheader
>      %lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [
> %scevgep5, %while.body ]
>      %lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1,
> %while.body ]
>      %count = phi i32 [ %count.next, %while.body ], [ %N,
> %while.body.preheader ]
> -    %scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1
> -    %scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1
> -    %load = load i16, i16* %scevgep6, align 2
> -    store i16 %load, i16* %scevgep2, align 2
> +    %scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1
> +    %scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1
> +    %load = load i16, i16* %scevgep4, align 2
> +    store i16 %load, i16* %scevgep7, align 2
>      %count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32
> %count, i32 1)
>      %cmp1 = icmp ne i32 %count.next, 0
>      %scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1
> @@ -46,13 +49,8 @@
>      ret void
>    }
>
> -  ; Function Attrs: noduplicate nounwind
>    declare i1 @llvm.test.set.loop.iterations.i32(i32) #0
> -
> -  ; Function Attrs: noduplicate nounwind
>    declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
> -
> -  ; Function Attrs: nounwind
>    declare void @llvm.stackprotector(i8*, i8**) #1
>
>    attributes #0 = { noduplicate nounwind }
> @@ -67,7 +65,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -107,6 +105,7 @@ machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.1(0x40000000), %bb.2(0x40000000)
> +    liveins: $r0, $r1, $r2, $r7, $lr
>
>      frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
>      frame-setup CFI_INSTRUCTION def_cfa_offset 8
> @@ -118,21 +117,24 @@ body:             |
>
>    bb.1.not.preheader:
>      successors: %bb.2(0x40000000), %bb.4(0x40000000)
> +    liveins: $lr, $r0, $r1
>
>      t2WhileLoopStart renamable $lr, %bb.4, implicit-def dead $cpsr
>      tB %bb.2, 14, $noreg
>
>    bb.2.while.body.preheader:
>      successors: %bb.3(0x80000000)
> +    liveins: $lr, $r0, $r1
>
>      renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg
>      renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg
>
>    bb.3.while.body:
>      successors: %bb.3(0x7c000000), %bb.4(0x04000000)
> +    liveins: $lr, $r0, $r1
>
> -    renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2,
> 14, $noreg :: (load 2 from %ir.scevgep6)
> -    early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed
> renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2)
> +    renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2,
> 14, $noreg :: (load 2 from %ir.scevgep4)
> +    early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed
> renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7)
>      renamable $lr = t2LoopDec killed renamable $lr, 1
>      t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr
>      tB %bb.4, 14, $noreg
>
> Modified: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir
> (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/revert-while.mir Tue
> Sep 17 05:19:32 2019
> @@ -13,6 +13,9 @@
>  # CHECK-NEXT:   tB %bb.3, 14
>
>  --- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
>    define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32*
> nocapture readonly %b, i32 %N) #0 {
>    entry:
>      %0 = call i1 @llvm.test.set.loop.iterations.i32(i32 %N)
> @@ -23,15 +26,15 @@
>      %scevgep5 = getelementptr i32, i32* %b, i32 -1
>      br label %do.body
>
> -  do.body:                                          ; preds =
> %do.body.preheader, %do.body
> +  do.body:                                          ; preds = %do.body,
> %do.body.preheader
>      %lsr.iv6 = phi i32* [ %scevgep5, %do.body.preheader ], [ %scevgep7,
> %do.body ]
>      %lsr.iv = phi i32* [ %scevgep2, %do.body.preheader ], [ %scevgep3,
> %do.body ]
>      %1 = phi i32 [ %2, %do.body ], [ %N, %do.body.preheader ]
> -    %scevgep8 = getelementptr i32, i32* %lsr.iv6, i32 1
> -    %scevgep4 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep = getelementptr i32, i32* %lsr.iv6, i32 1
> +    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
>      %size = call i32 @llvm.arm.space(i32 4096, i32 undef)
> -    %tmp = load i32, i32* %scevgep8, align 4
> -    store i32 %tmp, i32* %scevgep4, align 4
> +    %tmp = load i32, i32* %scevgep, align 4
> +    store i32 %tmp, i32* %scevgep1, align 4
>      %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
>      %3 = icmp ne i32 %2, 0
>      %scevgep3 = getelementptr i32, i32* %lsr.iv, i32 1
> @@ -51,9 +54,6 @@
>    ; Function Attrs: noduplicate nounwind
>    declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
>
> -  ; Function Attrs: nounwind
> -  declare void @llvm.stackprotector(i8*, i8**) #1
> -
>    attributes #0 = { "target-features"="+lob" }
>    attributes #1 = { nounwind }
>    attributes #2 = { noduplicate nounwind }
> @@ -67,7 +67,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -107,6 +107,7 @@ machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.1(0x40000000), %bb.3(0x40000000)
> +    liveins: $r1, $r2, $r3, $r7, $lr
>
>      frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
>      frame-setup CFI_INSTRUCTION def_cfa_offset 8
> @@ -117,6 +118,7 @@ body:             |
>
>    bb.1.do.body.preheader:
>      successors: %bb.2(0x80000000)
> +    liveins: $r1, $r2, $r3
>
>      renamable $r0, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
>      renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14, $noreg
> @@ -124,10 +126,11 @@ body:             |
>
>    bb.2.do.body:
>      successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr, $r0, $r1
>
>      dead renamable $r2 = SPACE 4096, undef renamable $r0
> -    renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14,
> $noreg :: (load 4 from %ir.scevgep8)
> -    early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed
> renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep4)
> +    renamable $r2, renamable $r0 = t2LDR_PRE killed renamable $r0, 4, 14,
> $noreg :: (load 4 from %ir.scevgep)
> +    early-clobber renamable $r1 = t2STR_PRE killed renamable $r2, killed
> renamable $r1, 4, 14, $noreg :: (store 4 into %ir.scevgep1)
>      renamable $lr = t2LoopDec killed renamable $lr, 1
>      t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
>      tB %bb.3, 14, $noreg
> @@ -136,4 +139,3 @@ body:             |
>      tPOP_RET 14, $noreg, def $r7, def $pc
>
>  ...
> -
>
> Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir?rev=372111&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir
> (added)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/safe-def-no-mov.mir
> Tue Sep 17 05:19:32 2019
> @@ -0,0 +1,124 @@
> +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s
> -verify-machineinstrs -o - | FileCheck %s
> +# CHECK: $lr = t2DLS $r0
> +# CHECK: $lr = tMOVr $r0, 14
> +# CHECK: $lr = t2LEUpdate renamable $lr, %bb.2
> +
> +# TODO: Explore the preheader to remove the redundant tMOVr
> +
> +--- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
> +  define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly
> %q) {
> +  entry:
> +    %scevgep = getelementptr i32, i32* %q, i32 -1
> +    %scevgep3 = getelementptr i32, i32* %p, i32 -1
> +    call void @llvm.set.loop.iterations.i32(i32 %n)
> +    br label %preheader
> +
> +  preheader:
> +    br label %while.body
> +
> +  while.body:                                       ; preds =
> %while.body, %entry
> +    %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3,
> %preheader ]
> +    %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader
> ]
> +    %0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
> +    %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %1 = load i32, i32* %scevgep6, align 4
> +    store i32 %1, i32* %scevgep2, align 4
> +    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
> +    %3 = icmp ne i32 %2, 0
> +    br i1 %3, label %while.body, label %while.end
> +
> +  while.end:                                        ; preds = %while.body
> +    ret i32 0
> +  }
> +
> +  declare void @llvm.set.loop.iterations.i32(i32) #0
> +  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
> +
> +  attributes #0 = { noduplicate nounwind }
> +  attributes #1 = { nounwind }
> +
> +...
> +---
> +name:            do_copy
> +alignment:       2
> +exposesReturnsTwice: false
> +legalized:       false
> +regBankSelected: false
> +selected:        false
> +failedISel:      false
> +tracksRegLiveness: true
> +hasWinCFI:       false
> +registers:       []
> +liveins:
> +  - { reg: '$r0', virtual-reg: '' }
> +  - { reg: '$r1', virtual-reg: '' }
> +  - { reg: '$r2', virtual-reg: '' }
> +frameInfo:
> +  isFrameAddressTaken: false
> +  isReturnAddressTaken: false
> +  hasStackMap:     false
> +  hasPatchPoint:   false
> +  stackSize:       8
> +  offsetAdjustment: 0
> +  maxAlignment:    4
> +  adjustsStack:    false
> +  hasCalls:        false
> +  stackProtector:  ''
> +  maxCallFrameSize: 0
> +  cvBytesOfCalleeSavedRegisters: 0
> +  hasOpaqueSPAdjustment: false
> +  hasVAStart:      false
> +  hasMustTailInVarArgFunc: false
> +  localFrameSize:  0
> +  savePoint:       ''
> +  restorePoint:    ''
> +fixedStack:      []
> +stack:
> +  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$lr',
> callee-saved-restored: false,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$r7',
> callee-saved-restored: true,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +callSites:       []
> +constants:       []
> +machineFunctionInfo: {}
> +body:             |
> +  bb.0.entry:
> +    successors: %bb.1(0x80000000)
> +    liveins: $r0, $r1, $r2, $r7, $lr
> +
> +    frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
> +    frame-setup CFI_INSTRUCTION def_cfa_offset 8
> +    frame-setup CFI_INSTRUCTION offset $lr, -4
> +    frame-setup CFI_INSTRUCTION offset $r7, -8
> +    t2DoLoopStart $r0
> +    renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
> +    renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
> +
> +  bb.1.preheader:
> +    successors: %bb.2(0x80000000)
> +    liveins: $r0
> +    $lr = tMOVr $r0, 14, $noreg
> +
> +  bb.2.while.body:
> +    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr, $r0, $r1
> +
> +    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14,
> $noreg :: (load 4 from %ir.scevgep6)
> +    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed
> renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
> +    renamable $lr = t2LoopDec killed renamable $lr, 1
> +    t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
> +    tB %bb.3, 14, $noreg
> +
> +  bb.3.while.end:
> +    $r0, dead $cpsr = tMOVi8 0, 14, $noreg
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
> +
> +...
>
> Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir?rev=372111&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir
> (added)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-liveout.mir Tue
> Sep 17 05:19:32 2019
> @@ -0,0 +1,122 @@
> +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s
> -verify-machineinstrs -o - | FileCheck %s
> +# CHECK-NOT: $lr = t2DLS
> +# CHECK: $lr = tMOVr $r0, 14
> +# CHECK-NOT: $lr = t2LEUpdate
> +
> +--- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
> +  define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly
> %q) {
> +  entry:
> +    %scevgep = getelementptr i32, i32* %q, i32 -1
> +    %scevgep3 = getelementptr i32, i32* %p, i32 -1
> +    call void @llvm.set.loop.iterations.i32(i32 %n)
> +    br label %preheader
> +
> +  preheader:
> +    br label %while.body
> +
> +  while.body:                                       ; preds =
> %while.body, %entry
> +    %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3,
> %preheader ]
> +    %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader
> ]
> +    %0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
> +    %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %1 = load i32, i32* %scevgep6, align 4
> +    store i32 %1, i32* %scevgep2, align 4
> +    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
> +    %3 = icmp ne i32 %2, 0
> +    br i1 %3, label %while.body, label %while.end
> +
> +  while.end:                                        ; preds = %while.body
> +    ret i32 0
> +  }
> +
> +  declare void @llvm.set.loop.iterations.i32(i32) #0
> +  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
> +
> +  attributes #0 = { noduplicate nounwind }
> +  attributes #1 = { nounwind }
> +
> +...
> +---
> +name:            do_copy
> +alignment:       2
> +exposesReturnsTwice: false
> +legalized:       false
> +regBankSelected: false
> +selected:        false
> +failedISel:      false
> +tracksRegLiveness: true
> +hasWinCFI:       false
> +registers:       []
> +liveins:
> +  - { reg: '$r0', virtual-reg: '' }
> +  - { reg: '$r1', virtual-reg: '' }
> +  - { reg: '$r2', virtual-reg: '' }
> +frameInfo:
> +  isFrameAddressTaken: false
> +  isReturnAddressTaken: false
> +  hasStackMap:     false
> +  hasPatchPoint:   false
> +  stackSize:       8
> +  offsetAdjustment: 0
> +  maxAlignment:    4
> +  adjustsStack:    false
> +  hasCalls:        false
> +  stackProtector:  ''
> +  maxCallFrameSize: 0
> +  cvBytesOfCalleeSavedRegisters: 0
> +  hasOpaqueSPAdjustment: false
> +  hasVAStart:      false
> +  hasMustTailInVarArgFunc: false
> +  localFrameSize:  0
> +  savePoint:       ''
> +  restorePoint:    ''
> +fixedStack:      []
> +stack:
> +  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$lr',
> callee-saved-restored: false,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$r7',
> callee-saved-restored: true,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +callSites:       []
> +constants:       []
> +machineFunctionInfo: {}
> +body:             |
> +  bb.0.entry:
> +    successors: %bb.1(0x80000000)
> +    liveins: $r0, $r1, $r2, $r7, $lr
> +
> +    frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit
> $sp
> +    frame-setup CFI_INSTRUCTION def_cfa_offset 8
> +    frame-setup CFI_INSTRUCTION offset $lr, -4
> +    frame-setup CFI_INSTRUCTION offset $r7, -8
> +    t2DoLoopStart $r0
> +    renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg
> +    renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
> +
> +  bb.1.preheader:
> +    successors: %bb.2(0x80000000)
> +    liveins: $r0, $lr
> +    $lr = tMOVr $r0, 14, $noreg
> +
> +  bb.2.while.body:
> +    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr, $r0, $r1
> +
> +    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14,
> $noreg :: (load 4 from %ir.scevgep6)
> +    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed
> renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
> +    renamable $lr = t2LoopDec killed renamable $lr, 1
> +    t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
> +    tB %bb.3, 14, $noreg
> +
> +  bb.3.while.end:
> +    $r0, dead $cpsr = tMOVi8 0, 14, $noreg
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
> +
> +...
>
> Added: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir?rev=372111&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir
> (added)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-use-after.mir
> Tue Sep 17 05:19:32 2019
> @@ -0,0 +1,122 @@
> +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s
> -verify-machineinstrs -o - | FileCheck %s
> +# CHECK-NOT: $lr = t2DLS
> +# CHECK: $lr = tMOVr $r0, 14
> +# CHECK-NOT: $lr = t2LEUpdate
> +
> +--- |
> +  target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
> +  target triple = "thumbv8.1m.main"
> +
> +  define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly
> %q) {
> +  entry:
> +    %scevgep = getelementptr i32, i32* %q, i32 -1
> +    %scevgep3 = getelementptr i32, i32* %p, i32 -1
> +    call void @llvm.set.loop.iterations.i32(i32 %n)
> +    br label %preheader
> +
> +  preheader:
> +    br label %while.body
> +
> +  while.body:                                       ; preds =
> %while.body, %entry
> +    %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3,
> %preheader ]
> +    %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %preheader
> ]
> +    %0 = phi i32 [ %n, %preheader ], [ %2, %while.body ]
> +    %scevgep6 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep2 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %1 = load i32, i32* %scevgep6, align 4
> +    store i32 %1, i32* %scevgep2, align 4
> +    %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1
> +    %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1
> +    %2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %0, i32 1)
> +    %3 = icmp ne i32 %2, 0
> +    br i1 %3, label %while.body, label %while.end
> +
> +  while.end:                                        ; preds = %while.body
> +    ret i32 0
> +  }
> +
> +  declare void @llvm.set.loop.iterations.i32(i32) #0
> +  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
> +
> +  attributes #0 = { noduplicate nounwind }
> +  attributes #1 = { nounwind }
> +
> +...
> +---
> +name:            do_copy
> +alignment:       2
> +exposesReturnsTwice: false
> +legalized:       false
> +regBankSelected: false
> +selected:        false
> +failedISel:      false
> +tracksRegLiveness: true
> +hasWinCFI:       false
> +registers:       []
> +liveins:
> +  - { reg: '$r0', virtual-reg: '' }
> +  - { reg: '$r1', virtual-reg: '' }
> +  - { reg: '$r2', virtual-reg: '' }
> +frameInfo:
> +  isFrameAddressTaken: false
> +  isReturnAddressTaken: false
> +  hasStackMap:     false
> +  hasPatchPoint:   false
> +  stackSize:       8
> +  offsetAdjustment: 0
> +  maxAlignment:    4
> +  adjustsStack:    false
> +  hasCalls:        false
> +  stackProtector:  ''
> +  maxCallFrameSize: 0
> +  cvBytesOfCalleeSavedRegisters: 0
> +  hasOpaqueSPAdjustment: false
> +  hasVAStart:      false
> +  hasMustTailInVarArgFunc: false
> +  localFrameSize:  0
> +  savePoint:       ''
> +  restorePoint:    ''
> +fixedStack:      []
> +stack:
> +  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$lr',
> callee-saved-restored: false,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment:
> 4,
> +      stack-id: default, callee-saved-register: '$r7',
> callee-saved-restored: true,
> +      debug-info-variable: '', debug-info-expression: '',
> debug-info-location: '' }
> +callSites:       []
> +constants:       []
> +machineFunctionInfo: {}
> +body:             |
> +  bb.0.entry:
> +    successors: %bb.1(0x80000000)
> +    liveins: $r0, $r1, $r2, $r7, $lr
> +
> +    frame-setup tPUSH 14, $noreg, killed $r7, implicit-def $sp, implicit
> $sp
> +    frame-setup CFI_INSTRUCTION def_cfa_offset 8
> +    frame-setup CFI_INSTRUCTION offset $lr, -4
> +    frame-setup CFI_INSTRUCTION offset $r7, -8
> +    t2DoLoopStart $r0
> +    renamable $r0 = t2SUBri killed renamable $lr, 4, 14, $noreg, def $cpsr
> +    renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg
> +
> +  bb.1.preheader:
> +    successors: %bb.2(0x80000000)
> +    liveins: $r0
> +    $lr = tMOVr $r0, 14, $noreg
> +
> +  bb.2.while.body:
> +    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr, $r0, $r1
> +
> +    renamable $r2, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14,
> $noreg :: (load 4 from %ir.scevgep6)
> +    early-clobber renamable $r0 = t2STR_PRE killed renamable $r2, killed
> renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep2)
> +    renamable $lr = t2LoopDec killed renamable $lr, 1
> +    t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
> +    tB %bb.3, 14, $noreg
> +
> +  bb.3.while.end:
> +    $r0, dead $cpsr = tMOVi8 0, 14, $noreg
> +    tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0
> +
> +...
>
> Modified:
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> ---
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir
> (original)
> +++
> llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.mir
> Tue Sep 17 05:19:32 2019
> @@ -3,8 +3,6 @@
>  # CHECK-NOT: WhileLoopStart
>
>  --- |
> -  ; ModuleID =
> '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-negative-offset.ll'
> -  source_filename = "while-size-limit.ll"
>    target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
>    target triple = "thumbv8.1m.main"
>
> @@ -47,8 +45,10 @@
>
>    ; Function Attrs: nounwind
>    declare i32 @llvm.arm.space(i32 immarg, i32) #1
> +
>    ; Function Attrs: noduplicate nounwind
>    declare i1 @llvm.test.set.loop.iterations.i32(i32) #2
> +
>    ; Function Attrs: noduplicate nounwind
>    declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2
>
> @@ -65,7 +65,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -130,6 +130,7 @@ machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.4(0x80000000)
> +    liveins: $r0, $r1, $r2, $r3, $r4, $lr
>
>      frame-setup tPUSH 14, $noreg, killed $r4, killed $lr, implicit-def
> $sp, implicit $sp
>      frame-setup CFI_INSTRUCTION def_cfa_offset 8
> @@ -192,5 +193,3 @@ body:             |
>      tB %bb.2, 14, $noreg
>
>  ...
> -
> -
>
> Modified: llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir?rev=372111&r1=372110&r2=372111&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/LowOverheadLoops/while.mir Tue Sep 17
> 05:19:32 2019
> @@ -10,8 +10,6 @@
>  # CHECK:   $lr = t2LEUpdate renamable $lr
>
>  --- |
> -  ; ModuleID =
> '/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll'
> -  source_filename =
> "/home/sampar01/src/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while.ll"
>    target datalayout =
> "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
>    target triple = "thumbv8.1m.main"
>
> @@ -25,14 +23,14 @@
>      %scevgep3 = getelementptr i16, i16* %b, i32 -1
>      br label %while.body
>
> -  while.body:                                       ; preds =
> %while.body.preheader, %while.body
> +  while.body:                                       ; preds =
> %while.body, %while.body.preheader
>      %lsr.iv4 = phi i16* [ %scevgep3, %while.body.preheader ], [
> %scevgep5, %while.body ]
>      %lsr.iv = phi i16* [ %scevgep, %while.body.preheader ], [ %scevgep1,
> %while.body ]
>      %1 = phi i32 [ %3, %while.body ], [ %N, %while.body.preheader ]
> -    %scevgep2 = getelementptr i16, i16* %lsr.iv, i32 1
> -    %scevgep6 = getelementptr i16, i16* %lsr.iv4, i32 1
> -    %2 = load i16, i16* %scevgep6, align 2, !tbaa !2
> -    store i16 %2, i16* %scevgep2, align 2, !tbaa !2
> +    %scevgep7 = getelementptr i16, i16* %lsr.iv, i32 1
> +    %scevgep4 = getelementptr i16, i16* %lsr.iv4, i32 1
> +    %2 = load i16, i16* %scevgep4, align 2
> +    store i16 %2, i16* %scevgep7, align 2
>      %3 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %1, i32 1)
>      %4 = icmp ne i32 %3, 0
>      %scevgep1 = getelementptr i16, i16* %lsr.iv, i32 1
> @@ -48,15 +46,6 @@
>
>    attributes #0 = { noduplicate nounwind }
>    attributes #1 = { nounwind }
> -
> -  !llvm.module.flags = !{!0, !1}
> -
> -  !0 = !{i32 1, !"wchar_size", i32 4}
> -  !1 = !{i32 1, !"min_enum_size", i32 4}
> -  !2 = !{!3, !3, i64 0}
> -  !3 = !{!"short", !4, i64 0}
> -  !4 = !{!"omnipotent char", !5, i64 0}
> -  !5 = !{!"Simple C/C++ TBAA"}
>
>  ...
>  ---
> @@ -67,7 +56,7 @@ legalized:       false
>  regBankSelected: false
>  selected:        false
>  failedISel:      false
> -tracksRegLiveness: false
> +tracksRegLiveness: true
>  hasWinCFI:       false
>  registers:       []
>  liveins:
> @@ -107,6 +96,7 @@ machineFunctionInfo: {}
>  body:             |
>    bb.0.entry:
>      successors: %bb.1(0x40000000), %bb.3(0x40000000)
> +    liveins: $r0, $r1, $r2, $r7, $lr
>
>      frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def
> $sp, implicit $sp
>      frame-setup CFI_INSTRUCTION def_cfa_offset 8
> @@ -117,6 +107,7 @@ body:             |
>
>    bb.1.while.body.preheader:
>      successors: %bb.2(0x80000000)
> +    liveins: $r0, $r1, $r2
>
>      renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 2, 14, $noreg
>      renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 2, 14, $noreg
> @@ -124,9 +115,10 @@ body:             |
>
>    bb.2.while.body:
>      successors: %bb.2(0x7c000000), %bb.3(0x04000000)
> +    liveins: $lr, $r0, $r1
>
> -    renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2,
> 14, $noreg :: (load 2 from %ir.scevgep6, !tbaa !2)
> -    early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed
> renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep2, !tbaa !2)
> +    renamable $r2, renamable $r1 = t2LDRH_PRE killed renamable $r1, 2,
> 14, $noreg :: (load 2 from %ir.scevgep4)
> +    early-clobber renamable $r0 = t2STRH_PRE killed renamable $r2, killed
> renamable $r0, 2, 14, $noreg :: (store 2 into %ir.scevgep7)
>      renamable $lr = t2LoopDec killed renamable $lr, 1
>      t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
>      tB %bb.3, 14, $noreg
> @@ -135,4 +127,3 @@ body:             |
>      tPOP_RET 14, $noreg, def $r7, def $pc
>
>  ...
> -
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190918/4b959a94/attachment-0001.html>


More information about the llvm-commits mailing list