[llvm] 58f3201 - [ARM] Updates to arm-block-placement pass
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 12 06:46:34 PDT 2021
Author: Malhar Jajoo
Date: 2021-04-12T14:46:23+01:00
New Revision: 58f3201a20f7c51393ef4509b69515cb9c4b32bf
URL: https://github.com/llvm/llvm-project/commit/58f3201a20f7c51393ef4509b69515cb9c4b32bf
DIFF: https://github.com/llvm/llvm-project/commit/58f3201a20f7c51393ef4509b69515cb9c4b32bf.diff
LOG: [ARM] Updates to arm-block-placement pass
The patch makes two updates to the arm-block-placement pass:
- Handle arbitrarily nested loops
- Extends the search (for t2WhileLoopStartLR) to the predecessor of the
preHeader.
Differential Revision: https://reviews.llvm.org/D99649
Added:
Modified:
llvm/lib/Target/ARM/ARMBlockPlacement.cpp
llvm/test/CodeGen/Thumb2/block-placement.mir
llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
index 155a6cb18d85d..57914b3b3d618 100644
--- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -38,6 +38,8 @@ class ARMBlockPlacement : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After);
bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
+ bool fixBackwardsWLS(MachineLoop *ML);
+ bool processPostOrderLoops(MachineLoop *ML);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -57,9 +59,135 @@ char ARMBlockPlacement::ID = 0;
INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false,
false)
+static MachineInstr *findWLSInBlock(MachineBasicBlock *MBB) {
+ for (auto &Terminator : MBB->terminators()) {
+ if (Terminator.getOpcode() == ARM::t2WhileLoopStartLR)
+ return &Terminator;
+ }
+ return nullptr;
+}
+
+/// Find t2WhileLoopStartLR in the loop predecessor BB or otherwise in its only
+/// predecessor. If found, returns (BB, WLS Instr) pair, otherwise a null pair.
+static MachineInstr *findWLS(MachineLoop *ML) {
+ MachineBasicBlock *Predecessor = ML->getLoopPredecessor();
+ if (!Predecessor)
+ return nullptr;
+ MachineInstr *WlsInstr = findWLSInBlock(Predecessor);
+ if (WlsInstr)
+ return WlsInstr;
+ if (Predecessor->pred_size() == 1)
+ return findWLSInBlock(*Predecessor->pred_begin());
+ return nullptr;
+}
+
+/// Checks if loop has a backwards branching WLS, and if possible, fixes it.
+/// This requires checking the preheader (or it's predecessor) for a WLS and if
+/// its target is before it.
+/// If moving the target block wouldn't produce another backwards WLS or a new
+/// forwards LE branch, then move the target block after the preheader (or it's
+/// predecessor).
+bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
+ MachineInstr *WlsInstr = findWLS(ML);
+ if (!WlsInstr)
+ return false;
+
+ MachineBasicBlock *Predecessor = WlsInstr->getParent();
+ MachineBasicBlock *LoopExit = WlsInstr->getOperand(2).getMBB();
+ // We don't want to move the function's entry block.
+ if (!LoopExit->getPrevNode())
+ return false;
+ if (blockIsBefore(Predecessor, LoopExit))
+ return false;
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
+ << Predecessor->getFullName() << " to "
+ << LoopExit->getFullName() << "\n");
+
+ // Make sure that moving the target block doesn't cause any of its WLSs
+ // that were previously not backwards to become backwards
+ bool CanMove = true;
+ MachineInstr *WlsInLoopExit = findWLSInBlock(LoopExit);
+ if (WlsInLoopExit) {
+ // An example loop structure where the LoopExit can't be moved, since
+ // bb1's WLS will become backwards once it's moved after bb3
+ // bb1: - LoopExit
+ // WLS bb2
+ // bb2: - LoopExit2
+ // ...
+ // bb3: - Predecessor
+ // WLS bb1
+ // bb4: - Header
+ MachineBasicBlock *LoopExit2 = WlsInLoopExit->getOperand(2).getMBB();
+ // If the WLS from LoopExit to LoopExit2 is already backwards then
+ // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
+ // after the Predecessor then moving will keep it as a forward branch, so it
+ // can be moved. If LoopExit2 is between the Predecessor and LoopExit then
+ // moving LoopExit will make it a backwards branch, so it can't be moved
+ // since we'd fix one and introduce one backwards branch.
+ // TODO: Analyse the blocks to make a decision if it would be worth
+ // moving LoopExit even if LoopExit2 is between the Predecessor and
+ // LoopExit.
+ if (!blockIsBefore(LoopExit2, LoopExit) &&
+ (LoopExit2 == Predecessor || blockIsBefore(LoopExit2, Predecessor))) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Can't move the target block as it would "
+ "introduce a new backwards WLS branch\n");
+ CanMove = false;
+ }
+ }
+
+ if (CanMove) {
+ // Make sure no LEs become forwards.
+ // An example loop structure where the LoopExit can't be moved, since
+ // bb2's LE will become forwards once bb1 is moved after bb3.
+ // bb1: - LoopExit
+ // bb2:
+ // LE bb1 - Terminator
+ // bb3: - Predecessor
+ // WLS bb1
+ // bb4: - Header
+ for (auto It = LoopExit->getIterator(); It != Predecessor->getIterator();
+ It++) {
+ MachineBasicBlock *MBB = &*It;
+ for (auto &Terminator : MBB->terminators()) {
+ if (Terminator.getOpcode() != ARM::t2LoopEnd &&
+ Terminator.getOpcode() != ARM::t2LoopEndDec)
+ continue;
+ MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
+ // The LE will become forwards branching if it branches to LoopExit
+ // which isn't allowed by the architecture, so we should avoid
+ // introducing these.
+ // TODO: Analyse the blocks to make a decision if it would be worth
+ // moving LoopExit even if we'd introduce a forwards LE
+ if (LETarget == LoopExit) {
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Can't move the target block as it would "
+ "introduce a new forwards LE branch\n");
+ CanMove = false;
+ break;
+ }
+ }
+ }
+ }
+
+ if (CanMove)
+ moveBasicBlock(LoopExit, Predecessor);
+
+ return CanMove;
+}
+
+/// Updates ordering (of WLS BB and their loopExits) in inner loops first
+/// Returns true if any change was made in any of the loops
+bool ARMBlockPlacement::processPostOrderLoops(MachineLoop *ML) {
+ bool Changed = false;
+ for (auto *InnerML : *ML)
+ Changed |= processPostOrderLoops(InnerML);
+ return Changed | fixBackwardsWLS(ML);
+}
+
bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
- return false;
+ return false;
const ARMSubtarget &ST = static_cast<const ARMSubtarget &>(MF.getSubtarget());
if (!ST.hasLOB())
return false;
@@ -72,109 +200,9 @@ bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BBUtils->adjustBBOffsetsAfter(&MF.front());
bool Changed = false;
- // Find loops with a backwards branching WLS.
- // This requires looping over the loops in the function, checking each
- // preheader for a WLS and if its target is before the preheader. If moving
- // the target block wouldn't produce another backwards WLS or a new forwards
- // LE branch then move the target block after the preheader.
- for (auto *ML : *MLI) {
- MachineBasicBlock *Preheader = ML->getLoopPredecessor();
- if (!Preheader)
- continue;
-
- for (auto &Terminator : Preheader->terminators()) {
- if (Terminator.getOpcode() != ARM::t2WhileLoopStartLR)
- continue;
- MachineBasicBlock *LoopExit = Terminator.getOperand(2).getMBB();
- // We don't want to move the function's entry block.
- if (!LoopExit->getPrevNode())
- continue;
- if (blockIsBefore(Preheader, LoopExit))
- continue;
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from "
- << Preheader->getFullName() << " to "
- << LoopExit->getFullName() << "\n");
-
- // Make sure that moving the target block doesn't cause any of its WLSs
- // that were previously not backwards to become backwards
- bool CanMove = true;
- for (auto &LoopExitTerminator : LoopExit->terminators()) {
- if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStartLR)
- continue;
- // An example loop structure where the LoopExit can't be moved, since
- // bb1's WLS will become backwards once it's moved after bb3 bb1: -
- // LoopExit
- // WLS bb2 - LoopExit2
- // bb2:
- // ...
- // bb3: - Preheader
- // WLS bb1
- // bb4: - Header
- MachineBasicBlock *LoopExit2 =
- LoopExitTerminator.getOperand(2).getMBB();
- // If the WLS from LoopExit to LoopExit2 is already backwards then
- // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is
- // after the Preheader then moving will keep it as a forward branch, so
- // it can be moved. If LoopExit2 is between the Preheader and LoopExit
- // then moving LoopExit will make it a backwards branch, so it can't be
- // moved since we'd fix one and introduce one backwards branch.
- // TODO: Analyse the blocks to make a decision if it would be worth
- // moving LoopExit even if LoopExit2 is between the Preheader and
- // LoopExit.
- if (!blockIsBefore(LoopExit2, LoopExit) &&
- (LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) {
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX
- << "Can't move the target block as it would "
- "introduce a new backwards WLS branch\n");
- CanMove = false;
- break;
- }
- }
-
- if (CanMove) {
- // Make sure no LEs become forwards.
- // An example loop structure where the LoopExit can't be moved, since
- // bb2's LE will become forwards once bb1 is moved after bb3.
- // bb1: - LoopExit
- // bb2:
- // LE bb1 - Terminator
- // bb3: - Preheader
- // WLS bb1
- // bb4: - Header
- for (auto It = LoopExit->getIterator(); It != Preheader->getIterator();
- It++) {
- MachineBasicBlock *MBB = &*It;
- for (auto &Terminator : MBB->terminators()) {
- if (Terminator.getOpcode() != ARM::t2LoopEnd &&
- Terminator.getOpcode() != ARM::t2LoopEndDec)
- continue;
- MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB();
- // The LE will become forwards branching if it branches to LoopExit
- // which isn't allowed by the architecture, so we should avoid
- // introducing these.
- // TODO: Analyse the blocks to make a decision if it would be worth
- // moving LoopExit even if we'd introduce a forwards LE
- if (LETarget == LoopExit) {
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX
- << "Can't move the target block as it would "
- "introduce a new forwards LE branch\n");
- CanMove = false;
- break;
- }
- }
- }
-
- if (!CanMove)
- break;
- }
-
- if (CanMove) {
- moveBasicBlock(LoopExit, Preheader);
- Changed = true;
- break;
- }
- }
- }
+ // Find loops with a backwards branching WLS and fix if possible.
+ for (auto *ML : *MLI)
+ Changed |= processPostOrderLoops(ML);
return Changed;
}
@@ -184,6 +212,8 @@ bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB,
return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB);
}
+/// Moves a given MBB to be positioned after another MBB while maintaining
+/// existing control flow
void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
MachineBasicBlock *After) {
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after "
@@ -195,6 +225,9 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
BB->moveAfter(After);
+ // Since only the blocks are to be moved around (but the control flow must
+ // not change), if there were any fall-throughs (to/from adjacent blocks),
+ // replace with unconditional branch to the fall through block.
auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) {
LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from "
<< From->getName() << " to " << To->getName() << "\n");
diff --git a/llvm/test/CodeGen/Thumb2/block-placement.mir b/llvm/test/CodeGen/Thumb2/block-placement.mir
index 855895b45ee63..9f40817e0c423 100644
--- a/llvm/test/CodeGen/Thumb2/block-placement.mir
+++ b/llvm/test/CodeGen/Thumb2/block-placement.mir
@@ -1,16 +1,19 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -run-pass=arm-block-placement %s -o - | FileCheck %s
--- |
+ ; Checks that loopExitBlock gets moved (in forward direction) if there is a backwards WLS to it.
define void @backwards_branch(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
entry:
unreachable
}
+ ; Checks that loopExitBlock does not get reordered (since it is entry block) even if there is a backwards WLS to it.
define void @backwards_branch_entry_block(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
entry:
unreachable
}
+ ; Checks that loopExitBlock (containing a backwards WLS) is moved (in forward direction) if there is a backwards WLS to it.
define void @backwards_branch_target_already_backwards(i32 %N, i32* nocapture %a, i32* nocapture readonly %b) local_unnamed_addr #0 {
entry:
unreachable
@@ -21,16 +24,25 @@
unreachable
}
+ ; Checks that loopExitBlock (to which a backwards LE exists) is not moved if moving it would cause the LE to become forwards branching.
define void @backwards_branch_forwards_le(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 {
entry:
unreachable
}
+ ; Checks that a MachineFunction is unaffected if it doesn't contain any WLS (pseudo) instruction.
define void @no_preheader(i32 %N, i32 %M, i32* nocapture %a, i32* nocapture %b, i32* nocapture %c) local_unnamed_addr #0 {
entry:
unreachable
}
+ ; Within a nested loop, checks that loopExit gets moved (in forward direction) if there exists a backwards WLS to it.
+ ; Both the WLS and loopExit are at depth=3.
+ define void @nested_loops(i32 %n, i32 %m, i32 %l, i8* noalias %X, i8* noalias %Y) local_unnamed_addr #0 {
+ entry:
+ unreachable
+ }
+
declare dso_local i32 @g(...) local_unnamed_addr #1
declare dso_local i32 @h(...) local_unnamed_addr #1
@@ -441,3 +453,188 @@ body: |
bb.5:
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $r5, def $r7, def $pc
...
+---
+name: nested_loops
+alignment: 4
+tracksRegLiveness: true
+liveins:
+ - { reg: '$r0' }
+ - { reg: '$r1' }
+ - { reg: '$r2' }
+ - { reg: '$r3' }
+frameInfo:
+ stackSize: 32
+ maxAlignment: 4
+ maxCallFrameSize: 0
+fixedStack:
+ - { id: 0, size: 4, alignment: 8, isImmutable: true }
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr',
+ callee-saved-restored: false }
+ - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r10' }
+ - { id: 2, type: spill-slot, offset: -12, size: 4, alignment: 4, callee-saved-register: '$r9' }
+ - { id: 3, type: spill-slot, offset: -16, size: 4, alignment: 4, callee-saved-register: '$r8' }
+ - { id: 4, type: spill-slot, offset: -20, size: 4, alignment: 4, callee-saved-register: '$r7' }
+ - { id: 5, type: spill-slot, offset: -24, size: 4, alignment: 4, callee-saved-register: '$r6' }
+ - { id: 6, type: spill-slot, offset: -28, size: 4, alignment: 4, callee-saved-register: '$r5' }
+ - { id: 7, type: spill-slot, offset: -32, size: 4, alignment: 4, callee-saved-register: '$r4' }
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: nested_loops
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $lr
+ ; CHECK: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $lr
+ ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r10, -8
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r9, -12
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r8, -16
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -20
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r6, -24
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -28
+ ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -32
+ ; CHECK: tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: t2IT 11, 8, implicit-def $itstate
+ ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 11 /* CC::lt */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc, implicit killed $itstate
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: liveins: $r0, $r1, $r2, $r3
+ ; CHECK: renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
+ ; CHECK: $r9 = tMOVr killed $r2, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.9(0x04000000), %bb.3(0x7c000000)
+ ; CHECK: liveins: $r0, $r1, $r3, $r8, $r9, $r12
+ ; CHECK: renamable $r8 = nuw nsw t2ADDri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+ ; CHECK: tCMPhir renamable $r8, renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: renamable $r12 = t2ADDri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x50000000), %bb.2(0x30000000)
+ ; CHECK: liveins: $r0, $r1, $r3, $r8, $r9, $r12
+ ; CHECK: tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, killed $cpsr
+ ; CHECK: bb.4:
+ ; CHECK: successors: %bb.6(0x80000000)
+ ; CHECK: liveins: $r0, $r1, $r3, $r8, $r9, $r12
+ ; CHECK: renamable $r4, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+ ; CHECK: $r10 = tMOVr $r12, 14 /* CC::al */, $noreg
+ ; CHECK: $r2 = tMOVr $r3, 14 /* CC::al */, $noreg
+ ; CHECK: t2B %bb.6, 14 /* CC::al */, $noreg
+ ; CHECK: bb.6:
+ ; CHECK: successors: %bb.7(0x50000000), %bb.5(0x30000000)
+ ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
+ ; CHECK: renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.5, implicit-def dead $cpsr
+ ; CHECK: tB %bb.7, 14 /* CC::al */, $noreg
+ ; CHECK: bb.5:
+ ; CHECK: successors: %bb.2(0x04000000), %bb.6(0x7c000000)
+ ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
+ ; CHECK: renamable $r4, dead $cpsr = nuw nsw tADDi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 1, 14 /* CC::al */, $noreg
+ ; CHECK: tCMPr renamable $r4, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK: renamable $r10 = t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK: t2Bcc %bb.2, 0 /* CC::eq */, killed $cpsr
+ ; CHECK: tB %bb.6, 14 /* CC::al */, $noreg
+ ; CHECK: bb.7:
+ ; CHECK: successors: %bb.8(0x80000000)
+ ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
+ ; CHECK: $r5 = tMOVr $r10, 14 /* CC::al */, $noreg
+ ; CHECK: $r6 = tMOVr $r2, 14 /* CC::al */, $noreg
+ ; CHECK: t2B %bb.8, 14 /* CC::al */, $noreg
+ ; CHECK: bb.8:
+ ; CHECK: successors: %bb.8(0x7c000000), %bb.5(0x04000000)
+ ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r8, $r9, $r10, $r12
+ ; CHECK: tSTRi killed $r0, $r1, 0, 14 /* CC::al */, $noreg
+ ; CHECK: renamable $lr = t2LoopEndDec killed renamable $lr, %bb.8, implicit-def dead $cpsr
+ ; CHECK: t2B %bb.5, 14 /* CC::al */, $noreg
+ ; CHECK: bb.9:
+ ; CHECK: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc
+ bb.0:
+ successors: %bb.1
+ liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $lr
+ frame-setup CFI_INSTRUCTION def_cfa_offset 32
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r10, -8
+ frame-setup CFI_INSTRUCTION offset $r9, -12
+ frame-setup CFI_INSTRUCTION offset $r8, -16
+ frame-setup CFI_INSTRUCTION offset $r7, -20
+ frame-setup CFI_INSTRUCTION offset $r6, -24
+ frame-setup CFI_INSTRUCTION offset $r5, -28
+ frame-setup CFI_INSTRUCTION offset $r4, -32
+ tCMPi8 renamable $r0, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2IT 11, 8, implicit-def $itstate
+ $sp = frame-destroy t2LDMIA_RET $sp, 11 /* CC::lt */, killed $cpsr, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc, implicit killed $itstate
+
+ bb.1:
+ liveins: $r0, $r1, $r2, $r3
+
+ renamable $r12 = t2LDRi12 $sp, 32, 14 /* CC::al */, $noreg :: (load 4 from %fixed-stack.0, align 8)
+ $r9 = tMOVr killed $r2, 14 /* CC::al */, $noreg
+ renamable $r8 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.2, 14 /* CC::al */, $noreg
+
+ bb.8:
+ successors: %bb.9(0x04000000), %bb.2(0x7c000000)
+ liveins: $r0, $r1, $r3, $r8, $r9, $r12
+
+ renamable $r8 = nuw nsw t2ADDri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg
+ renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 1, 14 /* CC::al */, $noreg
+ tCMPhir renamable $r8, renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ renamable $r12 = t2ADDri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg
+ t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr
+
+ bb.2:
+ successors: %bb.3(0x50000000), %bb.8(0x30000000)
+ liveins: $r0, $r1, $r3, $r8, $r9, $r12
+
+ tCMPi8 renamable $r1, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.8, 11 /* CC::lt */, killed $cpsr
+
+ bb.3:
+ liveins: $r0, $r1, $r3, $r8, $r9, $r12
+
+ renamable $r4, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
+ $r10 = tMOVr $r12, 14 /* CC::al */, $noreg
+ $r2 = tMOVr $r3, 14 /* CC::al */, $noreg
+ t2B %bb.4, 14 /* CC::al */, $noreg
+
+ bb.7:
+ successors: %bb.8(0x04000000), %bb.4(0x7c000000)
+ liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
+
+ renamable $r4, dead $cpsr = nuw nsw tADDi8 killed renamable $r4, 1, 14 /* CC::al */, $noreg
+ renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 1, 14 /* CC::al */, $noreg
+ tCMPr renamable $r4, renamable $r1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ renamable $r10 = t2ADDri killed renamable $r10, 1, 14 /* CC::al */, $noreg, $noreg
+ t2Bcc %bb.8, 0 /* CC::eq */, killed $cpsr
+
+ bb.4:
+ successors: %bb.5(0x50000000), %bb.7(0x30000000)
+ liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
+
+ renamable $lr = t2WhileLoopStartLR killed renamable $r9, %bb.7, implicit-def dead $cpsr
+
+ bb.5:
+ liveins: $r0, $r1, $r2, $r3, $r4, $r8, $r9, $r10, $r12
+
+ $r5 = tMOVr $r10, 14 /* CC::al */, $noreg
+ $r6 = tMOVr $r2, 14 /* CC::al */, $noreg
+ t2B %bb.6, 14 /* CC::al */, $noreg
+
+ bb.6:
+ successors: %bb.6(0x7c000000), %bb.7(0x04000000)
+ liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r8, $r9, $r10, $r12
+
+ tSTRi killed $r0, $r1, 0, 14 /* CC::al */, $noreg
+ renamable $lr = t2LoopEndDec killed renamable $lr, %bb.6, implicit-def dead $cpsr
+ t2B %bb.7, 14 /* CC::al */, $noreg
+
+ bb.9:
+ $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $pc
+
+...
diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index dd8c4f110691d..7f5acd19e0b35 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1077,18 +1077,10 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: b .LBB16_4
-; CHECK-NEXT: .LBB16_3: @ %while.end
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
-; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: subs.w r12, r12, #1
-; CHECK-NEXT: vstrb.8 q0, [r2], #8
-; CHECK-NEXT: add.w r0, r5, r0, lsl #1
-; CHECK-NEXT: add.w r5, r0, #8
-; CHECK-NEXT: beq.w .LBB16_12
-; CHECK-NEXT: .LBB16_4: @ %while.body
+; CHECK-NEXT: b .LBB16_3
+; CHECK-NEXT: .LBB16_3: @ %while.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-NEXT: @ Child Loop BB16_6 Depth 2
+; CHECK-NEXT: @ Child Loop BB16_5 Depth 2
; CHECK-NEXT: @ Child Loop BB16_10 Depth 2
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: ldrh.w lr, [r3, #14]
@@ -1125,14 +1117,14 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vfma.f16 q0, q1, lr
; CHECK-NEXT: cmp r0, #16
-; CHECK-NEXT: blo .LBB16_7
-; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: blo .LBB16_6
+; CHECK-NEXT: @ %bb.4: @ %for.body.preheader
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: .LBB16_6: @ %for.body
-; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
+; CHECK-NEXT: .LBB16_5: @ %for.body
+; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh r0, [r6], #16
; CHECK-NEXT: vldrw.u32 q1, [r5]
@@ -1163,33 +1155,39 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: adds r5, #16
; CHECK-NEXT: vfma.f16 q0, q1, r4
-; CHECK-NEXT: le lr, .LBB16_6
-; CHECK-NEXT: b .LBB16_8
-; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: le lr, .LBB16_5
+; CHECK-NEXT: b .LBB16_7
+; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: .LBB16_8: @ %for.end
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: .LBB16_7: @ %for.end
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: subs.w lr, r0, #0
-; CHECK-NEXT: beq.w .LBB16_3
+; CHECK-NEXT: wls lr, r0, .LBB16_8
; CHECK-NEXT: b .LBB16_9
+; CHECK-NEXT: .LBB16_8: @ %while.end
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: subs.w r12, r12, #1
+; CHECK-NEXT: vstrb.8 q0, [r2], #8
+; CHECK-NEXT: add.w r0, r5, r0, lsl #1
+; CHECK-NEXT: add.w r5, r0, #8
+; CHECK-NEXT: beq .LBB16_12
+; CHECK-NEXT: b .LBB16_3
; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: .LBB16_10: @ %while.body76
-; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
+; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh r4, [r6], #2
; CHECK-NEXT: vldrh.u16 q1, [r0], #2
; CHECK-NEXT: vfma.f16 q0, q1, r4
-; CHECK-NEXT: subs.w lr, lr, #1
-; CHECK-NEXT: bne .LBB16_10
-; CHECK-NEXT: b .LBB16_11
-; CHECK-NEXT: .LBB16_11: @ %while.end.loopexit
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: le lr, .LBB16_10
+; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: add.w r5, r5, r0, lsl #1
-; CHECK-NEXT: b .LBB16_3
+; CHECK-NEXT: b .LBB16_8
; CHECK-NEXT: .LBB16_12: @ %if.end
; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 2fd717bf2d478..19a710974548d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1071,18 +1071,10 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT: b .LBB16_4
-; CHECK-NEXT: .LBB16_3: @ %while.end
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: subs.w r12, r12, #1
-; CHECK-NEXT: vstrb.8 q0, [r2], #16
-; CHECK-NEXT: add.w r0, r4, r0, lsl #2
-; CHECK-NEXT: add.w r4, r0, #16
-; CHECK-NEXT: beq .LBB16_12
-; CHECK-NEXT: .LBB16_4: @ %while.body
+; CHECK-NEXT: b .LBB16_3
+; CHECK-NEXT: .LBB16_3: @ %while.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-NEXT: @ Child Loop BB16_6 Depth 2
+; CHECK-NEXT: @ Child Loop BB16_5 Depth 2
; CHECK-NEXT: @ Child Loop BB16_10 Depth 2
; CHECK-NEXT: add.w lr, r10, #8
; CHECK-NEXT: vldrw.u32 q0, [r1], #16
@@ -1109,14 +1101,14 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: vfma.f32 q0, q3, r11
; CHECK-NEXT: cmp r0, #16
; CHECK-NEXT: vfma.f32 q0, q1, r8
-; CHECK-NEXT: blo .LBB16_7
-; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: blo .LBB16_6
+; CHECK-NEXT: @ %bb.4: @ %for.body.preheader
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: .LBB16_6: @ %for.body
-; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
+; CHECK-NEXT: .LBB16_5: @ %for.body
+; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldm.w r7, {r0, r3, r5, r6, r8, r11}
; CHECK-NEXT: vldrw.u32 q1, [r4], #32
@@ -1137,34 +1129,40 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: vfma.f32 q0, q2, r11
; CHECK-NEXT: vfma.f32 q0, q3, r9
; CHECK-NEXT: vfma.f32 q0, q1, r1
-; CHECK-NEXT: le lr, .LBB16_6
-; CHECK-NEXT: b .LBB16_8
-; CHECK-NEXT: .LBB16_7: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: le lr, .LBB16_5
+; CHECK-NEXT: b .LBB16_7
+; CHECK-NEXT: .LBB16_6: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: .LBB16_8: @ %for.end
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: .LBB16_7: @ %for.end
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT: subs.w lr, r0, #0
-; CHECK-NEXT: beq .LBB16_3
+; CHECK-NEXT: wls lr, r0, .LBB16_8
; CHECK-NEXT: b .LBB16_9
+; CHECK-NEXT: .LBB16_8: @ %while.end
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: subs.w r12, r12, #1
+; CHECK-NEXT: vstrb.8 q0, [r2], #16
+; CHECK-NEXT: add.w r0, r4, r0, lsl #2
+; CHECK-NEXT: add.w r4, r0, #16
+; CHECK-NEXT: beq .LBB16_12
+; CHECK-NEXT: b .LBB16_3
; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: .LBB16_10: @ %while.body76
-; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
+; CHECK-NEXT: @ Parent Loop BB16_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldr r0, [r7], #4
; CHECK-NEXT: vldrw.u32 q1, [r3], #4
; CHECK-NEXT: vfma.f32 q0, q1, r0
-; CHECK-NEXT: subs.w lr, lr, #1
-; CHECK-NEXT: bne .LBB16_10
-; CHECK-NEXT: b .LBB16_11
-; CHECK-NEXT: .LBB16_11: @ %while.end.loopexit
-; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
+; CHECK-NEXT: le lr, .LBB16_10
+; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit
+; CHECK-NEXT: @ in Loop: Header=BB16_3 Depth=1
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: add.w r4, r4, r0, lsl #2
-; CHECK-NEXT: b .LBB16_3
+; CHECK-NEXT: b .LBB16_8
; CHECK-NEXT: .LBB16_12: @ %if.end
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
More information about the llvm-commits
mailing list