[llvm] 9ba5238 - [ARM] Simplification to ARMBlockPlacement Pass.

Malhar Jajoo via llvm-commits llvm-commits at lists.llvm.org
Wed May 5 17:20:37 PDT 2021


Author: Malhar Jajoo
Date: 2021-05-06T01:20:18+01:00
New Revision: 9ba5238c28daf930df3dcb9bc90b5c8531ae1466

URL: https://github.com/llvm/llvm-project/commit/9ba5238c28daf930df3dcb9bc90b5c8531ae1466
DIFF: https://github.com/llvm/llvm-project/commit/9ba5238c28daf930df3dcb9bc90b5c8531ae1466.diff

LOG: [ARM] Simplification to ARMBlockPlacement Pass.

It simplifies the logic by moving the predecessor  (preHeader or it's predecessor) above the target (or loopExit),
instead of moving the target to after the predecessor.

Since the loopExit is no longer being moved, directions of any branches within/to it are unaffected.

While the predecessor is being moved, the backwards movement simplifies some considerations,
and the only consideration now required is that a forward WLS to the predecessor should not become backwards.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D100094

Added: 
    

Modified: 
    llvm/lib/Target/ARM/ARMBlockPlacement.cpp
    llvm/test/CodeGen/Thumb2/block-placement.mir
    llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
    llvm/test/CodeGen/Thumb2/mve-float32regloops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
index e71af5f36f195..539db713f17ca 100644
--- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -36,7 +36,7 @@ class ARMBlockPlacement : public MachineFunctionPass {
   ARMBlockPlacement() : MachineFunctionPass(ID) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
-  void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After);
+  void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *Before);
   bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
   bool fixBackwardsWLS(MachineLoop *ML);
   bool processPostOrderLoops(MachineLoop *ML);
@@ -82,11 +82,11 @@ static MachineInstr *findWLS(MachineLoop *ML) {
 }
 
 /// Checks if loop has a backwards branching WLS, and if possible, fixes it.
-/// This requires checking the preheader (or it's predecessor) for a WLS and if
-/// its target is before it.
-/// If moving the target block wouldn't produce another backwards WLS or a new
-/// forwards LE branch, then move the target block after the preheader (or it's
-/// predecessor).
+/// This requires checking the predecessor (ie. preheader or it's predecessor)
+/// for a WLS and if its loopExit/target is before it.
+/// If moving the predecessor won't convert a WLS (to the predecessor) from
+/// a forward to a backward branching WLS, then move the predecessor block
+/// to before the loopExit/target.
 bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
   MachineInstr *WlsInstr = findWLS(ML);
   if (!WlsInstr)
@@ -94,7 +94,8 @@ bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
 
   MachineBasicBlock *Predecessor = WlsInstr->getParent();
   MachineBasicBlock *LoopExit = WlsInstr->getOperand(2).getMBB();
-  // We don't want to move the function's entry block.
+
+  // We don't want to move Preheader to before the function's entry block.
   if (!LoopExit->getPrevNode())
     return false;
   if (blockIsBefore(Predecessor, LoopExit))
@@ -103,77 +104,38 @@ bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
                     << Predecessor->getFullName() << " to "
                     << LoopExit->getFullName() << "\n");
 
-  // Make sure that moving the target block doesn't cause any of its WLSs
-  // that were previously not backwards to become backwards
-  bool CanMove = true;
-  MachineInstr *WlsInLoopExit = findWLSInBlock(LoopExit);
-  if (WlsInLoopExit) {
-    // An example loop structure where the LoopExit can't be moved, since
-    // bb1's WLS will become backwards once it's moved after bb3
-    // bb1:          - LoopExit
-    //      WLS bb2
-    // bb2:          - LoopExit2
-    //      ...
-    // bb3:          - Predecessor
-    //      WLS bb1
-    // bb4:          - Header
-    MachineBasicBlock *LoopExit2 = WlsInLoopExit->getOperand(2).getMBB();
-    // If the WLS from LoopExit to LoopExit2 is already backwards then
-    // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
-    // after the Predecessor then moving will keep it as a forward branch, so it
-    // can be moved. If LoopExit2 is between the Predecessor and LoopExit then
-    // moving LoopExit will make it a backwards branch, so it can't be moved
-    // since we'd fix one and introduce one backwards branch.
-    // TODO: Analyse the blocks to make a decision if it would be worth
-    // moving LoopExit even if LoopExit2 is between the Predecessor and
-    // LoopExit.
-    if (!blockIsBefore(LoopExit2, LoopExit) &&
-        (LoopExit2 == Predecessor || blockIsBefore(LoopExit2, Predecessor))) {
-      LLVM_DEBUG(dbgs() << DEBUG_PREFIX
-                        << "Can't move the target block as it would "
-                           "introduce a new backwards WLS branch\n");
-      CanMove = false;
-    }
-  }
-
-  if (CanMove) {
-    // Make sure no LEs become forwards.
-    // An example loop structure where the LoopExit can't be moved, since
-    // bb2's LE will become forwards once bb1 is moved after bb3.
-    // bb1:           - LoopExit
-    // bb2:
-    //      LE  bb1  - Terminator
-    // bb3:          - Predecessor
-    //      WLS bb1
-    // bb4:          - Header
-    for (auto It = LoopExit->getIterator(); It != Predecessor->getIterator();
-         It++) {
-      MachineBasicBlock *MBB = &*It;
-      for (auto &Terminator : MBB->terminators()) {
-        if (Terminator.getOpcode() != ARM::t2LoopEnd &&
-            Terminator.getOpcode() != ARM::t2LoopEndDec)
-          continue;
-        MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
-        // The LE will become forwards branching if it branches to LoopExit
-        // which isn't allowed by the architecture, so we should avoid
-        // introducing these.
-        // TODO: Analyse the blocks to make a decision if it would be worth
-        // moving LoopExit even if we'd introduce a forwards LE
-        if (LETarget == LoopExit) {
-          LLVM_DEBUG(dbgs() << DEBUG_PREFIX
-                            << "Can't move the target block as it would "
-                               "introduce a new forwards LE branch\n");
-          CanMove = false;
-          break;
-        }
+  // Make sure no forward branching WLSs to the Predecessor become backwards
+  // branching. An example loop structure where the Predecessor can't be moved,
+  // since bb2's WLS will become forwards once bb3 is moved before/above bb1.
+  //
+  // bb1:           - LoopExit
+  // bb2:
+  //      WLS  bb3
+  // bb3:          - Predecessor
+  //      WLS bb1
+  // bb4:          - Header
+  for (auto It = ++LoopExit->getIterator(); It != Predecessor->getIterator();
+       ++It) {
+    MachineBasicBlock *MBB = &*It;
+    for (auto &Terminator : MBB->terminators()) {
+      if (Terminator.getOpcode() != ARM::t2WhileLoopStartLR)
+        continue;
+      MachineBasicBlock *WLSTarget = Terminator.getOperand(2).getMBB();
+      // TODO: Analyse the blocks to make a decision if it would be worth
+      // moving Preheader even if we'd introduce a backwards WLS
+      if (WLSTarget == Predecessor) {
+        LLVM_DEBUG(
+            dbgs() << DEBUG_PREFIX
+                   << "Can't move Predecessor"
+                      "block as it would convert a WLS from forward to a "
+                      "backwards branching WLS\n");
+        return false;
       }
     }
   }
 
-  if (CanMove)
-    moveBasicBlock(LoopExit, Predecessor);
-
-  return CanMove;
+  moveBasicBlock(Predecessor, LoopExit);
+  return true;
 }
 
 /// Updates ordering (of WLS BB and their loopExits) in inner loops first
@@ -212,18 +174,20 @@ bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB,
   return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB);
 }
 
-/// Moves a given MBB to be positioned after another MBB while maintaining
-/// existing control flow
+// Moves a BasicBlock before another, without changing the control flow
 void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
-                                       MachineBasicBlock *After) {
-  LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after "
-                    << After->getName() << "\n");
+                                       MachineBasicBlock *Before) {
+  LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " before "
+                    << Before->getName() << "\n");
   MachineBasicBlock *BBPrevious = BB->getPrevNode();
   assert(BBPrevious && "Cannot move the function entry basic block");
-  MachineBasicBlock *AfterNext = After->getNextNode();
   MachineBasicBlock *BBNext = BB->getNextNode();
 
-  BB->moveAfter(After);
+  MachineBasicBlock *BeforePrev = Before->getPrevNode();
+  assert(BeforePrev &&
+         "Cannot move the given block to before the function entry block");
+  MachineFunction *F = BB->getParent();
+  BB->moveBefore(Before);
 
   // Since only the blocks are to be moved around (but the control flow must
   // not change), if there were any fall-throughs (to/from adjacent blocks),
@@ -251,12 +215,14 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
   // Fix fall-through to the moved BB from the one that used to be before it.
   if (BBPrevious->isSuccessor(BB))
     FixFallthrough(BBPrevious, BB);
-  // Fix fall through from the destination BB to the one that used to follow.
-  if (AfterNext && After->isSuccessor(AfterNext))
-    FixFallthrough(After, AfterNext);
+  // Fix fall through from the destination BB to the one that used to before it.
+  if (BeforePrev->isSuccessor(Before))
+    FixFallthrough(BeforePrev, Before);
   // Fix fall through from the moved BB to the one that used to follow.
   if (BBNext && BB->isSuccessor(BBNext))
     FixFallthrough(BB, BBNext);
 
-  BBUtils->adjustBBOffsetsAfter(After);
+  F->RenumberBlocks();
+  BBUtils->computeAllBlockSizes();
+  BBUtils->adjustBBOffsetsAfter(&F->front());
 }

diff  --git a/llvm/test/CodeGen/Thumb2/block-placement.mir b/llvm/test/CodeGen/Thumb2/block-placement.mir
index 7ea87c3859dd9..bf44e4e0265c4 100644
--- a/llvm/test/CodeGen/Thumb2/block-placement.mir
+++ b/llvm/test/CodeGen/Thumb2/block-placement.mir
@@ -1,51 +1,54 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement %s -o - | FileCheck %s
 --- |
-  ; Checks that loopExitBlock gets moved (in forward direction) if there is a backwards WLS to it.
-  define void @backwards_branch(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
+
+  ; Checks that Predecessor gets moved (to before the LoopExit) if it contains a backward WLS.
+  define void @backwards_branch(i32 %N, i32* nocapture %a, i32* nocapture readonly %b)   {
   entry:
     unreachable
   }
 
-  ; Checks that loopExitBlock does not get reordered (since it is entry block) even if there is a backwards WLS to it.
-  define void @backwards_branch_entry_block(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
+  ; Checks that Predecessor (containing a backwards WLS) does not get moved to before the loopExit if it is the entry block.
+  define void @backwards_branch_entry_block(i32 %N, i32* nocapture %a, i32* nocapture readonly %b)   {
   entry:
     unreachable
   }
 
-  ; Checks that loopExitBlock (containing a backwards WLS) is moved (in forward direction) if there is a backwards WLS to it.
-  define void @backwards_branch_target_already_backwards(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
+  ; Checks that Predecessor (to which a forward WLS exists) is not moved if moving it would cause the WLS to become backwards branching.
+  define void @backwards_branch_backwards_wls(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c)   {
   entry:
     unreachable
   }
 
-  define void @backwards_branch_sibling(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 {
+  ; Checks that a MachineFunction is unaffected if it doesn't contain any WLS (pseudo) instruction.
+  define void @no_predecessor(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c)   {
   entry:
     unreachable
   }
 
-  ; Checks that loopExitBlock (to which a backwards LE exists) is not moved if moving it would cause the LE to become forwards branching.
-  define void @backwards_branch_forwards_le(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 {
+  ; Within a nested (Both the WLS and loopExit are at depth=3 here) loop, checks that Predecessor
+  ; gets moved (in backward direction) if there exists a backdwards WLS from it to the LoopExit.
+  define void @nested_loops(i32 %n, i32 %m, i32 %l, i8* noalias %X, i8* noalias %Y)   {
   entry:
     unreachable
   }
 
-  ; Checks that a MachineFunction is unaffected if it doesn't contain any WLS (pseudo) instruction.
-  define void @no_preheader(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 {
+  ; Checks that Predecessor (to which a forward WLS exists) is moved if moving it would NOT cause the WLS
+  ; to become backwards branching.
+  define void @backwards_branch_forwards_wls(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) {
   entry:
     unreachable
   }
 
-  ; Within a nested loop, checks that loopExit gets moved (in forward direction) if there exists a backwards WLS to it.
-  ; Both the WLS and loopExit are at depth=3.
-  define void @nested_loops(i32 %n, i32 %m, i32 %l, i8* noalias %X, i8* noalias %Y) local_unnamed_addr #0 {
+  ; Checks that multiple predecessor case is handled appropriately
+  define void @multiple_predecessors(i32 %d, i32 %e, i32 %f) {
   entry:
     unreachable
   }
 
-  declare dso_local i32 @g(...) local_unnamed_addr #1
+  declare dso_local i32 @g(...)
 
-  declare dso_local i32 @h(...) local_unnamed_addr #1
+  declare dso_local i32 @h(...)
 
 ...
 ---
@@ -53,22 +56,22 @@ name:            backwards_branch
 body:             |
   ; CHECK-LABEL: name: backwards_branch
   ; CHECK: bb.0:
-  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   t2IT 11, 8, implicit-def $itstate
   ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-  ; CHECK: bb.2:
+  ; CHECK: bb.1:
   ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr
+  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r0, %bb.2, implicit-def dead $cpsr
   ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
-  ; CHECK: bb.1:
+  ; CHECK: bb.2:
   ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   ; CHECK: bb.3:
-  ; CHECK:   successors: %bb.3(0x7c000000), %bb.1(0x04000000)
+  ; CHECK:   successors: %bb.3(0x7c000000), %bb.2(0x04000000)
   ; CHECK:   renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg
   ; CHECK:   tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
+  ; CHECK:   t2B %bb.2, 14 /* CC::al */, $noreg
   bb.0:
     successors: %bb.2(0x80000000)
     liveins: $r0, $r1, $r2, $lr
@@ -144,199 +147,32 @@ body:             |
 
 ...
 ---
-name:            backwards_branch_target_already_backwards
-body:             |
-  ; CHECK-LABEL: name: backwards_branch_target_already_backwards
-  ; CHECK: bb.0:
-  ; CHECK:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
-  ; CHECK:   tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK:   t2Bcc %bb.1, 11 /* CC::lt */, killed $cpsr
-  ; CHECK:   t2B %bb.2, 14 /* CC::al */, $noreg
-  ; CHECK: bb.2:
-  ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   $lr = tMOVr $r0, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $lr, %bb.1, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
-  ; CHECK: bb.1:
-  ; CHECK:   successors: %bb.4(0x80000000)
-  ; CHECK:   tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK:   t2IT 11, 8, implicit-def $itstate
-  ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r1, %bb.0, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.4, 14 /* CC::al */, $noreg
-  ; CHECK: bb.3:
-  ; CHECK:   successors: %bb.3(0x7c000000), %bb.1(0x04000000)
-  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
-  ; CHECK: bb.4:
-  ; CHECK:   successors: %bb.5(0x80000000)
-  ; CHECK:   renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr
-  ; CHECK: bb.5:
-  ; CHECK:   successors: %bb.5(0x7c000000), %bb.6(0x04000000)
-  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.6, 14 /* CC::al */, $noreg
-  ; CHECK: bb.6:
-  ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
-  bb.0:
-    successors: %bb.1(0x50000000), %bb.3(0x30000000)
-    liveins: $r0, $r1, $r2, $r3, $lr
-
-    tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr
-    t2B %bb.1, 14 /* CC::al */, $noreg
-
-  bb.3:
-    successors: %bb.4(0x80000000)
-    liveins: $r1, $r3
-
-    tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    t2IT 11, 8, implicit-def $itstate
-    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-    $lr = t2WhileLoopStartLR killed renamable $r1, %bb.0, implicit-def dead $cpsr
-    t2B %bb.4, 14 /* CC::al */, $noreg
-
-  bb.1:
-    successors: %bb.2(0x80000000)
-    liveins: $r0, $r1, $r2, $r3
-
-    $lr = tMOVr $r0, 14 /* CC::al */, $noreg
-    renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg
-    $lr = t2WhileLoopStartLR killed renamable $lr, %bb.3, implicit-def dead $cpsr
-
-  bb.2:
-    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-    liveins: $lr, $r0, $r1, $r3
-
-    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr
-    t2B %bb.3, 14 /* CC::al */, $noreg
-
-  bb.4:
-    successors: %bb.5(0x80000000)
-    liveins: $r1, $r3
-
-    renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
-    $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr
-
-  bb.5:
-    successors: %bb.5(0x7c000000), %bb.6(0x04000000)
-    liveins: $lr, $r0
-
-    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
-    t2B %bb.6, 14 /* CC::al */, $noreg
-
-  bb.6:
-    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
-
-...
----
-name:            backwards_branch_sibling
-body:             |
-  ; CHECK-LABEL: name: backwards_branch_sibling
-  ; CHECK: bb.0:
-  ; CHECK:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
-  ; CHECK:   tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK:   t2Bcc %bb.1, 11 /* CC::lt */, killed $cpsr
-  ; CHECK:   t2B %bb.2, 14 /* CC::al */, $noreg
-  ; CHECK: bb.1:
-  ; CHECK:   successors: %bb.4(0x80000000)
-  ; CHECK:   tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK:   t2IT 11, 8, implicit-def $itstate
-  ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r1, %bb.2, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.4, 14 /* CC::al */, $noreg
-  ; CHECK: bb.2:
-  ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   $lr = tMOVr $r0, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $lr, %bb.1, implicit-def dead $cpsr
-  ; CHECK: bb.3:
-  ; CHECK:   successors: %bb.3(0x7c000000), %bb.1(0x04000000)
-  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
-  ; CHECK: bb.4:
-  ; CHECK:   successors: %bb.5(0x80000000)
-  ; CHECK:   renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr
-  ; CHECK: bb.5:
-  ; CHECK:   successors: %bb.5(0x7c000000), %bb.6(0x04000000)
-  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.6, 14 /* CC::al */, $noreg
-  ; CHECK: bb.6:
-  ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
-  bb.0:
-    successors: %bb.1(0x50000000), %bb.3(0x30000000)
-    liveins: $r0, $r1, $r2, $r3, $lr
-
-    tCMPi8 $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr
-    t2B %bb.1, 14 /* CC::al */, $noreg
-
-  bb.3:
-    successors: %bb.4(0x80000000)
-    liveins: $r1, $r3
-
-    tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    t2IT 11, 8, implicit-def $itstate
-    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
-    $lr = t2WhileLoopStartLR killed renamable $r1, %bb.1, implicit-def dead $cpsr
-    t2B %bb.4, 14 /* CC::al */, $noreg
-
-  bb.1:
-    successors: %bb.2(0x80000000)
-    liveins: $r0, $r1, $r2, $r3
-
-    $lr = tMOVr $r0, 14 /* CC::al */, $noreg
-    renamable $r0 = t2ADDrs killed renamable $r2, killed $r0, 18, 14 /* CC::al */, $noreg, $noreg
-    $lr = t2WhileLoopStartLR killed renamable $lr, %bb.3, implicit-def dead $cpsr
-
-  bb.2:
-    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-    liveins: $lr, $r0, $r1, $r3
-
-    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr
-    t2B %bb.3, 14 /* CC::al */, $noreg
-
-  bb.4:
-    successors: %bb.5(0x80000000)
-    liveins: $r1, $r3
-
-    renamable $r0 = t2ADDrs killed renamable $r3, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
-    $lr = t2WhileLoopStartLR killed renamable $r1, %bb.6, implicit-def dead $cpsr
-
-  bb.5:
-    successors: %bb.5(0x7c000000), %bb.6(0x04000000)
-    liveins: $lr, $r0
-
-    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
-    t2B %bb.6, 14 /* CC::al */, $noreg
-
-  bb.6:
-    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
-...
----
-name:            backwards_branch_forwards_le
+name:            backwards_branch_backwards_wls
 body:             |
-  ; CHECK-LABEL: name: backwards_branch_forwards_le
+  ; CHECK-LABEL: name: backwards_branch_backwards_wls
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.2(0x80000000)
   ; CHECK:   tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   t2IT 11, 8, implicit-def $itstate
   ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
   ; CHECK: bb.1:
-  ; CHECK:   successors: %bb.1(0x80000000)
-  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr
   ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
   ; CHECK: bb.2:
-  ; CHECK:   successors: %bb.3(0x80000000)
-  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr
+  ; CHECK:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
+  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.5, 14 /* CC::al */, $noreg
   ; CHECK: bb.3:
-  ; CHECK:   successors: %bb.3(0x7c000000), %bb.1(0x04000000)
-  ; CHECK:   renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg
-  ; CHECK:   tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg
-  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
+  ; CHECK:   successors: %bb.1(0x7c000000), %bb.4(0x04000000)
+  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.4, 14 /* CC::al */, $noreg
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.4(0x40000000)
+  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
   ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
+  ; CHECK: bb.5:
+  ; CHECK:   successors: %bb.5(0x40000000), %bb.3(0x40000000)
+  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
   bb.0:
     successors: %bb.2(0x80000000)
     liveins: $r0, $r1, $r2, $lr
@@ -346,29 +182,40 @@ body:             |
     frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
 
   bb.1:
-    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.1, implicit-def dead $cpsr
     frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
 
   bb.2:
-    successors: %bb.3(0x80000000)
+    successors: %bb.3(0x80000000), %bb.5(0x80000000)
     liveins: $r0, $r1, $r2
 
-    $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr
+    $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr
+    t2B %bb.5, 14 /* CC::al */, $noreg
 
   bb.3:
-    successors: %bb.3(0x7c000000), %bb.1(0x04000000)
+    successors: %bb.1(0x7c000000), %bb.4(0x04000000)
     liveins: $lr, $r1, $r2
 
-    renamable $r0 = tLDRi renamable $r2, 0, 14 /* CC::al */, $noreg
-    tSTRi killed renamable $r0, renamable $r1, 0, 14 /* CC::al */, $noreg
-    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.3, implicit-def dead $cpsr
+    $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr
+    t2B %bb.4, 14 /* CC::al */, $noreg
+
+  bb.4:
+    successors: %bb.1, %bb.4
+    liveins: $lr, $r1, $r2
+
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
     t2B %bb.1, 14 /* CC::al */, $noreg
 
+  bb.5:
+    successors: %bb.5, %bb.3
+    liveins: $lr, $r1, $r2
+
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
+    t2B %bb.3, 14 /* CC::al */, $noreg
 ...
 ---
-name:            no_preheader
+name:            no_predecessor
 body:             |
-  ; CHECK-LABEL: name: no_preheader
+  ; CHECK-LABEL: name: no_predecessor
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.2(0x30000000), %bb.1(0x50000000)
   ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $r5, $r7, killed $lr, implicit-def $sp, implicit $sp
@@ -518,26 +365,26 @@ body:             |
   ; CHECK:   tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   t2Bcc %bb.2, 11 /* CC::lt */, killed $cpsr
   ; CHECK: bb.4:
-  ; CHECK:   successors: %bb.6(0x80000000)
+  ; CHECK:   successors: %bb.5(0x80000000)
   ; CHECK:   liveins: $r0, $r1, $r3, $r8, $r9, $r12
   ; CHECK:   renamable $r4, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
   ; CHECK:   $r10 = tMOVr $r12, 14 /* CC::al */, $noreg
   ; CHECK:   $r2 = tMOVr $r3, 14 /* CC::al */, $noreg
-  ; CHECK:   t2B %bb.6, 14 /* CC::al */, $noreg
-  ; CHECK: bb.6:
-  ; CHECK:   successors: %bb.7(0x50000000), %bb.5(0x30000000)
+  ; CHECK:   t2B %bb.5, 14 /* CC::al */, $noreg
+  ; CHECK: bb.5:
+  ; CHECK:   successors: %bb.7(0x50000000), %bb.6(0x30000000)
   ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
-  ; CHECK:   renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.5, implicit-def dead $cpsr
+  ; CHECK:   renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.6, implicit-def dead $cpsr
   ; CHECK:   t2B %bb.7, 14 /* CC::al */, $noreg
-  ; CHECK: bb.5:
-  ; CHECK:   successors: %bb.2(0x04000000), %bb.6(0x7c000000)
+  ; CHECK: bb.6:
+  ; CHECK:   successors: %bb.2(0x04000000), %bb.5(0x7c000000)
   ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
   ; CHECK:   renamable $r4, dead $cpsr = nuw nsw tADDi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 1, 14 /* CC::al */, $noreg
   ; CHECK:   tCMPr renamable $r4, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
   ; CHECK:   renamable $r10 = t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr
-  ; CHECK:   t2B %bb.6, 14 /* CC::al */, $noreg
+  ; CHECK:   t2B %bb.5, 14 /* CC::al */, $noreg
   ; CHECK: bb.7:
   ; CHECK:   successors: %bb.8(0x80000000)
   ; CHECK:   liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
@@ -545,11 +392,11 @@ body:             |
   ; CHECK:   $r6 = tMOVr $r2, 14 /* CC::al */, $noreg
   ; CHECK:   t2B %bb.8, 14 /* CC::al */, $noreg
   ; CHECK: bb.8:
-  ; CHECK:   successors: %bb.8(0x7c000000), %bb.5(0x04000000)
+  ; CHECK:   successors: %bb.8(0x7c000000), %bb.6(0x04000000)
   ; CHECK:   liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r8, $r9, $r10, $r12
   ; CHECK:   tSTRi killed $r0, $r1, 0, 14 /* CC::al */, $noreg
   ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.8, implicit-def dead $cpsr
-  ; CHECK:   t2B %bb.5, 14 /* CC::al */, $noreg
+  ; CHECK:   t2B %bb.6, 14 /* CC::al */, $noreg
   ; CHECK: bb.9:
   ; CHECK:   $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc
   bb.0:
@@ -638,3 +485,184 @@ body:             |
     $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc
 
 ...
+---
+name:            backwards_branch_forwards_wls
+body:             |
+  ; CHECK-LABEL: name: backwards_branch_forwards_wls
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2IT 11, 8, implicit-def $itstate
+  ; CHECK:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
+  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r0, %bb.2, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.5, 14 /* CC::al */, $noreg
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK:   $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.4, 14 /* CC::al */, $noreg
+  ; CHECK: bb.3:
+  ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
+  ; CHECK: bb.5:
+  ; CHECK:   successors: %bb.5(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.2, 14 /* CC::al */, $noreg
+  bb.0:
+    successors: %bb.2
+    liveins: $r0, $r1, $r2, $lr
+
+    tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2IT 11, 8, implicit-def $itstate
+    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
+
+  bb.2:
+    successors: %bb.3, %bb.5
+    liveins: $r0, $r1, $r2
+
+    $lr = t2WhileLoopStartLR killed renamable $r0, %bb.3, implicit-def dead $cpsr
+    t2B %bb.5, 14 /* CC::al */, $noreg
+
+  bb.1:
+    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
+
+  bb.3:
+    successors: %bb.4, %bb.1
+    liveins: $lr, $r1, $r2
+
+    $lr = t2WhileLoopStartLR killed renamable $r0, %bb.1, implicit-def dead $cpsr
+    t2B %bb.4, 14 /* CC::al */, $noreg
+
+  bb.4:
+    successors: %bb.1, %bb.4
+    liveins: $lr, $r1, $r2
+
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
+    t2B %bb.1, 14 /* CC::al */, $noreg
+
+  bb.5:
+    successors: %bb.5, %bb.3
+    liveins: $lr, $r1, $r2
+
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+...
+---
+name:            multiple_predecessors
+body:             |
+  ; CHECK-LABEL: name: multiple_predecessors
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.3(0x55555555), %bb.2(0x2aaaaaab)
+  ; CHECK:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
+  ; CHECK:   frame-setup CFI_INSTRUCTION offset $r7, -8
+  ; CHECK:   $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
+  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 12
+  ; CHECK:   tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   t2IT 0, 8, implicit-def $itstate
+  ; CHECK:   tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+  ; CHECK:   t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr
+  ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.6(0x40000000), %bb.3(0x40000000)
+  ; CHECK:   renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.3, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.6, 14 /* CC::al */, $noreg
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK:   renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.3, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.4, 14 /* CC::al */, $noreg
+  ; CHECK: bb.3:
+  ; CHECK:   $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
+  ; CHECK:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0
+  ; CHECK: bb.4:
+  ; CHECK:   successors: %bb.5(0x80000000)
+  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+  ; CHECK:   renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   renamable $r0 = IMPLICIT_DEF
+  ; CHECK: bb.5:
+  ; CHECK:   successors: %bb.5(0x7c000000), %bb.1(0x04000000)
+  ; CHECK:   renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0
+  ; CHECK:   MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
+  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.5, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
+  ; CHECK: bb.6:
+  ; CHECK:   successors: %bb.7(0x80000000)
+  ; CHECK:   renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+  ; CHECK: bb.7:
+  ; CHECK:   successors: %bb.7(0x7c000000), %bb.3(0x04000000)
+  ; CHECK:   renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0
+  ; CHECK:   MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
+  ; CHECK:   renamable $lr = t2LoopEndDec killed renamable $lr, %bb.7, implicit-def dead $cpsr
+  ; CHECK:   t2B %bb.3, 14 /* CC::al */, $noreg
+  bb.0:
+    successors: %bb.7(0x80000000), %bb.1(0x40000000)
+    liveins: $r0, $r1, $r2, $r7, $lr
+
+    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
+    frame-setup CFI_INSTRUCTION def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION offset $lr, -4
+    frame-setup CFI_INSTRUCTION offset $r7, -8
+    $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
+    frame-setup CFI_INSTRUCTION def_cfa_offset 12
+    tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2IT 0, 8, implicit-def $itstate
+    tCMPi8 killed renamable $r1, 8, 0 /* CC::eq */, killed $cpsr, implicit-def $cpsr, implicit killed $itstate
+    t2Bcc %bb.1, 0 /* CC::eq */, killed $cpsr
+
+  bb.7:
+    $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
+    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, implicit undef $r0
+
+  bb.1:
+    successors: %bb.3(0x40000000), %bb.7(0x40000000)
+    liveins: $r2
+
+    renamable $lr = t2WhileLoopStartLR renamable $r2, %bb.7, implicit-def dead $cpsr
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+  bb.3:
+    successors: %bb.4(0x80000000)
+    liveins: $lr, $r2
+
+    renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+    renamable $r1 = t2ADDri $sp, 2, 14 /* CC::al */, $noreg, $noreg
+    renamable $r0 = IMPLICIT_DEF
+
+  bb.4:
+    successors: %bb.4(0x7c000000), %bb.2(0x04000000)
+    liveins: $lr, $r0, $r1, $r2, $r3
+
+    renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 1, 0, $noreg, undef renamable $q0
+    MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.4, implicit-def dead $cpsr
+    t2B %bb.2, 14 /* CC::al */, $noreg
+
+  bb.2:
+    successors: %bb.5(0x40000000), %bb.7(0x40000000)
+    liveins: $r0, $r1, $r2
+
+    renamable $lr = t2WhileLoopStartLR killed renamable $r2, %bb.7, implicit-def dead $cpsr
+    t2B %bb.5, 14 /* CC::al */, $noreg
+
+  bb.5:
+    successors: %bb.6(0x80000000)
+    liveins: $lr, $r0, $r1
+
+    renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+
+  bb.6:
+    successors: %bb.6(0x7c000000), %bb.7(0x04000000)
+    liveins: $lr, $r0, $r1, $r3
+
+    renamable $q0, renamable $r0 = MVE_VIWDUPu16 killed renamable $r0, renamable $r3, 2, 0, $noreg, undef renamable $q0
+    MVE_VSTRH16_rq undef renamable $q0, renamable $r1, killed renamable $q0, 0, $noreg
+    renamable $lr = t2LoopEndDec killed renamable $lr, %bb.6, implicit-def dead $cpsr
+    t2B %bb.7, 14 /* CC::al */, $noreg
+
+...

diff  --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index 7f5acd19e0b35..cc8a3b36c8305 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1077,10 +1077,23 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
 ; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    b .LBB16_3
-; CHECK-NEXT:  .LBB16_3: @ %while.body
+; CHECK-NEXT:    b .LBB16_5
+; CHECK-NEXT:  .LBB16_3: @ %for.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    wls lr, r0, .LBB16_4
+; CHECK-NEXT:    b .LBB16_9
+; CHECK-NEXT:  .LBB16_4: @ %while.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    subs.w r12, r12, #1
+; CHECK-NEXT:    vstrb.8 q0, [r2], #8
+; CHECK-NEXT:    add.w r0, r5, r0, lsl #1
+; CHECK-NEXT:    add.w r5, r0, #8
+; CHECK-NEXT:    beq.w .LBB16_12
+; CHECK-NEXT:  .LBB16_5: @ %while.body
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB16_5 Depth 2
+; CHECK-NEXT:    @ Child Loop BB16_7 Depth 2
 ; CHECK-NEXT:    @ Child Loop BB16_10 Depth 2
 ; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    ldrh.w lr, [r3, #14]
@@ -1117,14 +1130,14 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
 ; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    vfma.f16 q0, q1, lr
 ; CHECK-NEXT:    cmp r0, #16
-; CHECK-NEXT:    blo .LBB16_6
-; CHECK-NEXT:  @ %bb.4: @ %for.body.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    blo .LBB16_8
+; CHECK-NEXT:  @ %bb.6: @ %for.body.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    dls lr, r0
 ; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:  .LBB16_5: @ %for.body
-; CHECK-NEXT:    @ Parent Loop BB16_3 Depth=1
+; CHECK-NEXT:  .LBB16_7: @ %for.body
+; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldrh r0, [r6], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r5]
@@ -1155,39 +1168,26 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
 ; CHECK-NEXT:    adds r5, #16
 ; CHECK-NEXT:    vfma.f16 q0, q1, r4
-; CHECK-NEXT:    le lr, .LBB16_5
-; CHECK-NEXT:    b .LBB16_7
-; CHECK-NEXT:  .LBB16_6: @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    le lr, .LBB16_7
+; CHECK-NEXT:    b .LBB16_3
+; CHECK-NEXT:  .LBB16_8: @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:  .LBB16_7: @ %for.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
-; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    wls lr, r0, .LBB16_8
-; CHECK-NEXT:    b .LBB16_9
-; CHECK-NEXT:  .LBB16_8: @ %while.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    subs.w r12, r12, #1
-; CHECK-NEXT:    vstrb.8 q0, [r2], #8
-; CHECK-NEXT:    add.w r0, r5, r0, lsl #1
-; CHECK-NEXT:    add.w r5, r0, #8
-; CHECK-NEXT:    beq .LBB16_12
 ; CHECK-NEXT:    b .LBB16_3
 ; CHECK-NEXT:  .LBB16_9: @ %while.body76.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    mov r0, r5
 ; CHECK-NEXT:  .LBB16_10: @ %while.body76
-; CHECK-NEXT:    @ Parent Loop BB16_3 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldrh r4, [r6], #2
 ; CHECK-NEXT:    vldrh.u16 q1, [r0], #2
 ; CHECK-NEXT:    vfma.f16 q0, q1, r4
 ; CHECK-NEXT:    le lr, .LBB16_10
 ; CHECK-NEXT:  @ %bb.11: @ %while.end.loopexit
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    add.w r5, r5, r0, lsl #1
-; CHECK-NEXT:    b .LBB16_8
+; CHECK-NEXT:    b .LBB16_4
 ; CHECK-NEXT:  .LBB16_12: @ %if.end
 ; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}

diff  --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 19a710974548d..db57c913b90e0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1071,10 +1071,24 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
 ; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    b .LBB16_3
-; CHECK-NEXT:  .LBB16_3: @ %while.body
+; CHECK-NEXT:    b .LBB16_5
+; CHECK-NEXT:  .LBB16_3: @ %for.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload
+; CHECK-NEXT:    wls lr, r0, .LBB16_4
+; CHECK-NEXT:    b .LBB16_9
+; CHECK-NEXT:  .LBB16_4: @ %while.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    subs.w r12, r12, #1
+; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    add.w r0, r4, r0, lsl #2
+; CHECK-NEXT:    add.w r4, r0, #16
+; CHECK-NEXT:    beq .LBB16_12
+; CHECK-NEXT:  .LBB16_5: @ %while.body
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB16_5 Depth 2
+; CHECK-NEXT:    @ Child Loop BB16_7 Depth 2
 ; CHECK-NEXT:    @ Child Loop BB16_10 Depth 2
 ; CHECK-NEXT:    add.w lr, r10, #8
 ; CHECK-NEXT:    vldrw.u32 q0, [r1], #16
@@ -1101,14 +1115,14 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
 ; CHECK-NEXT:    vfma.f32 q0, q3, r11
 ; CHECK-NEXT:    cmp r0, #16
 ; CHECK-NEXT:    vfma.f32 q0, q1, r8
-; CHECK-NEXT:    blo .LBB16_6
-; CHECK-NEXT:  @ %bb.4: @ %for.body.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    blo .LBB16_8
+; CHECK-NEXT:  @ %bb.6: @ %for.body.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    dls lr, r0
 ; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:  .LBB16_5: @ %for.body
-; CHECK-NEXT:    @ Parent Loop BB16_3 Depth=1
+; CHECK-NEXT:  .LBB16_7: @ %for.body
+; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldm.w r7, {r0, r3, r5, r6, r8, r11}
 ; CHECK-NEXT:    vldrw.u32 q1, [r4], #32
@@ -1129,40 +1143,26 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
 ; CHECK-NEXT:    vfma.f32 q0, q2, r11
 ; CHECK-NEXT:    vfma.f32 q0, q3, r9
 ; CHECK-NEXT:    vfma.f32 q0, q1, r1
-; CHECK-NEXT:    le lr, .LBB16_5
-; CHECK-NEXT:    b .LBB16_7
-; CHECK-NEXT:  .LBB16_6: @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    le lr, .LBB16_7
+; CHECK-NEXT:    b .LBB16_3
+; CHECK-NEXT:  .LBB16_8: @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:  .LBB16_7: @ %for.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
-; CHECK-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    wls lr, r0, .LBB16_8
-; CHECK-NEXT:    b .LBB16_9
-; CHECK-NEXT:  .LBB16_8: @ %while.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
-; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    subs.w r12, r12, #1
-; CHECK-NEXT:    vstrb.8 q0, [r2], #16
-; CHECK-NEXT:    add.w r0, r4, r0, lsl #2
-; CHECK-NEXT:    add.w r4, r0, #16
-; CHECK-NEXT:    beq .LBB16_12
 ; CHECK-NEXT:    b .LBB16_3
 ; CHECK-NEXT:  .LBB16_9: @ %while.body76.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    mov r3, r4
 ; CHECK-NEXT:  .LBB16_10: @ %while.body76
-; CHECK-NEXT:    @ Parent Loop BB16_3 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldr r0, [r7], #4
 ; CHECK-NEXT:    vldrw.u32 q1, [r3], #4
 ; CHECK-NEXT:    vfma.f32 q0, q1, r0
 ; CHECK-NEXT:    le lr, .LBB16_10
 ; CHECK-NEXT:  @ %bb.11: @ %while.end.loopexit
-; CHECK-NEXT:    @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    add.w r4, r4, r0, lsl #2
-; CHECK-NEXT:    b .LBB16_8
+; CHECK-NEXT:    b .LBB16_4
 ; CHECK-NEXT:  .LBB16_12: @ %if.end
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}


        


More information about the llvm-commits mailing list