[llvm] [CodeGen] Restore MachineBlockPlacement block ordering (PR #99351)
John Brawn via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 02:58:45 PDT 2024
https://github.com/john-brawn-arm updated https://github.com/llvm/llvm-project/pull/99351
>From 1c2d4e81c69234feeafe23bc7d95d24241564e1e Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Tue, 16 Jul 2024 15:14:30 +0100
Subject: [PATCH 1/3] [CodeGen] Restore MachineBlockPlacement block ordering
PR #91843 changed the algorithm used to find the next unplaced block
so that it iterates through the blocks in BlockFilter instead of
iterating through the blocks in the function and checking if they are
in the block filter. Unfortunately this sometimes results in a
different block ordering being chosen, as the order of blocks in
BlockFilter comes from the order in MachineLoopInfo, and in some cases
this differs from the order they are in the function. This can also
give an end result that has worse performance.
Fix this by making collectLoopBlockSet place blocks in its output in
the order that they are in the function.
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 15 +-
llvm/test/CodeGen/ARM/block-order.mir | 529 +++++++++++++++++++++
2 files changed, 538 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/ARM/block-order.mir
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 4c864ca15ccc5..0308ee22b1446 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2614,18 +2614,21 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
LoopFreq += MBFI->getBlockFreq(LoopPred) *
MBPI->getEdgeProbability(LoopPred, L.getHeader());
- for (MachineBasicBlock *LoopBB : L.getBlocks()) {
- if (LoopBlockSet.count(LoopBB))
+ for (auto &MBB : *F) {
+ if (LoopBlockSet.count(&MBB) || !L.contains(&MBB))
continue;
- auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
+ auto Freq = MBFI->getBlockFreq(&MBB).getFrequency();
if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
continue;
- BlockChain *Chain = BlockToChain[LoopBB];
+ BlockChain *Chain = BlockToChain[&MBB];
for (MachineBasicBlock *ChainBB : *Chain)
LoopBlockSet.insert(ChainBB);
}
- } else
- LoopBlockSet.insert(L.block_begin(), L.block_end());
+ } else {
+ for (auto &MBB : *F)
+ if (L.contains(&MBB))
+ LoopBlockSet.insert(&MBB);
+ }
return LoopBlockSet;
}
diff --git a/llvm/test/CodeGen/ARM/block-order.mir b/llvm/test/CodeGen/ARM/block-order.mir
new file mode 100644
index 0000000000000..ecc749382f1f7
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/block-order.mir
@@ -0,0 +1,529 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -o - %s -mtriple=thumbv7em-arm-none-eabihf -run-pass=block-placement -verify-machineinstrs | FileCheck %s
+# RUN: llc -o - %s -mtriple=thumbv7em-arm-none-eabihf -run-pass=block-placement -force-loop-cold-block -verify-machineinstrs | FileCheck %s
+---
+name: fn
+tracksRegLiveness: true
+jumpTable:
+ kind: inline
+ entries:
+ - id: 0
+ blocks: [ '%bb.7', '%bb.16', '%bb.25', '%bb.32' ]
+body: |
+ ; CHECK-LABEL: name: fn
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x5c0b8170), %bb.2(0x23f47e90)
+ ; CHECK-NEXT: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr
+ ; CHECK-NEXT: $sp = frame-setup tSUBspi $sp, 3, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $r5 = tMOVr $r1, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r1 = nsw t2SUBri killed $r1, 1, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r7 = t2ASRri renamable $r1, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: $r0 = tMOVr $r3, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $r8 = tMOVr $r2, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r1 = t2ADDrs killed renamable $r1, killed renamable $r7, 235, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r7 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: $r9 = tMOVr killed $r3, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r4 = nuw nsw t2ADDrs killed renamable $r7, killed renamable $r1, 25, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r1 = nuw nsw t2MUL renamable $r4, killed $r2, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2CMPri renamable $r5, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.2, 10 /* CC::ge */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: $sp = frame-destroy tADDspi $sp, 3, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: liveins: $r1, $r4, $r5, $r8, $r9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r0 = t2ASRri renamable $r1, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: $lr = tMOVr killed $r5, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r10 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r2 = t2ADDrs renamable $r1, killed renamable $r0, 235, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r0 = nuw nsw t2LSLri killed renamable $r4, 3, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r2 = t2BICri killed renamable $r2, 7, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r1 = nsw t2MUL renamable $r0, killed renamable $r1, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x04000000), %bb.4(0x7c000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: t2CMPrr renamable $r10, renamable $lr, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.1, 0 /* CC::eq */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.7(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r7 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r4 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r1 = t2MUL renamable $r10, killed renamable $r1, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r2 = t2ASRri renamable $r1, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r3 = t2ADDrs renamable $r1, renamable $r2, 219, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r7 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.29:
+ ; CHECK-NEXT: successors: %bb.30(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.30:
+ ; CHECK-NEXT: successors: %bb.32(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.32:
+ ; CHECK-NEXT: successors: %bb.7(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r10, $r2, $r4, $r7, $r8, $r9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: t2CMPrr renamable $r4, renamable $r8, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.7, 11 /* CC::lt */, killed $cpsr
+ ; CHECK-NEXT: t2B %bb.3, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.7(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r1, $r2, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = t2MLA renamable $r1, renamable $r0, killed renamable $r7, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2CMPrr renamable $r4, renamable $r8, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.3, 10 /* CC::ge */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.8(0x50000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r1, $r2, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r3, renamable $r2 = t2LDR_POST killed renamable $r2, 4, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2CMPri renamable $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.5, 0 /* CC::eq */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: successors: %bb.32(0x19999998), %bb.9(0x66666668)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2RORri killed renamable $r1, 3, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: t2CMPri renamable $r1, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.32, 8 /* CC::hi */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9:
+ ; CHECK-NEXT: successors: %bb.10(0x20000000), %bb.17(0x20000000), %bb.24(0x20000000), %bb.30(0x20000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r12 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r5 = t2ADDrs killed renamable $r12, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: t2BR_JT killed renamable $r5, killed renamable $r1, %jump-table.0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.10:
+ ; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.11(0x40000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead renamable $r1 = t2LSLri renamable $r3, 31, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.12, 0 /* CC::eq */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.11:
+ ; CHECK-NEXT: successors: %bb.12(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r5 = t2ASRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.12:
+ ; CHECK-NEXT: successors: %bb.32(0x30000000), %bb.13(0x50000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.32, 4 /* CC::mi */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.13:
+ ; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead renamable $r1 = t2LSLri renamable $r3, 30, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.15, 5 /* CC::pl */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.14:
+ ; CHECK-NEXT: successors: %bb.15(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r6 = t2LSRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.15:
+ ; CHECK-NEXT: successors: %bb.32(0x30000000), %bb.17(0x50000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.32, 4 /* CC::mi */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.17:
+ ; CHECK-NEXT: successors: %bb.19(0x40000000), %bb.18(0x40000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead renamable $r1 = t2LSLri renamable $r3, 23, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.19, 5 /* CC::pl */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.18:
+ ; CHECK-NEXT: successors: %bb.19(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r6 = t2ASRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.19:
+ ; CHECK-NEXT: successors: %bb.32(0x30000000), %bb.20(0x50000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.32, 4 /* CC::mi */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.20:
+ ; CHECK-NEXT: successors: %bb.22(0x40000000), %bb.21(0x40000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead renamable $r1 = t2LSLri renamable $r3, 22, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.22, 5 /* CC::pl */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.21:
+ ; CHECK-NEXT: successors: %bb.22(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r6 = t2LSRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.22:
+ ; CHECK-NEXT: successors: %bb.32(0x30000000), %bb.24(0x50000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.32, 4 /* CC::mi */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.24:
+ ; CHECK-NEXT: successors: %bb.26(0x40000000), %bb.25(0x40000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead renamable $r1 = t2LSLri renamable $r3, 15, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.26, 5 /* CC::pl */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.25:
+ ; CHECK-NEXT: successors: %bb.26(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r6 = t2ASRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.26:
+ ; CHECK-NEXT: successors: %bb.32(0x30000000), %bb.27(0x50000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.32, 4 /* CC::mi */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.27:
+ ; CHECK-NEXT: successors: %bb.29(0x40000000), %bb.28(0x40000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead renamable $r1 = t2LSLri renamable $r3, 14, 14 /* CC::al */, $noreg, def $cpsr
+ ; CHECK-NEXT: t2Bcc %bb.29, 5 /* CC::pl */, killed $cpsr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.28:
+ ; CHECK-NEXT: successors: %bb.29(0x80000000)
+ ; CHECK-NEXT: liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $r6 = t2LSRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: t2B %bb.29, 14 /* CC::al */, $noreg
+ bb.0:
+ successors: %bb.37(0x80000000), %bb.1(0x32000000)
+ liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr
+ $sp = frame-setup tSUBspi $sp, 3, 14 /* CC::al */, $noreg
+ $r5 = tMOVr $r1, 14 /* CC::al */, $noreg
+ renamable $r1 = nsw t2SUBri killed $r1, 1, 14 /* CC::al */, $noreg, $noreg
+ renamable $r7 = t2ASRri renamable $r1, 31, 14 /* CC::al */, $noreg, $noreg
+ $r0 = tMOVr $r3, 14 /* CC::al */, $noreg
+ $r8 = tMOVr $r2, 14 /* CC::al */, $noreg
+ renamable $r1 = t2ADDrs killed renamable $r1, killed renamable $r7, 235, 14 /* CC::al */, $noreg, $noreg
+ renamable $r7 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
+ $r9 = tMOVr killed $r3, 14 /* CC::al */, $noreg
+ renamable $r4 = nuw nsw t2ADDrs killed renamable $r7, killed renamable $r1, 25, 14 /* CC::al */, $noreg, $noreg
+ renamable $r1 = nuw nsw t2MUL renamable $r4, killed $r2, 14 /* CC::al */, $noreg
+ t2CMPri renamable $r5, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.37, 11 /* CC::lt */, killed $cpsr
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+ liveins: $r1, $r4, $r5, $r8, $r9
+
+ renamable $r0 = t2ASRri renamable $r1, 31, 14 /* CC::al */, $noreg, $noreg
+ $lr = tMOVr killed $r5, 14 /* CC::al */, $noreg
+ renamable $r10 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ renamable $r2 = t2ADDrs renamable $r1, killed renamable $r0, 235, 14 /* CC::al */, $noreg, $noreg
+ renamable $r0 = nuw nsw t2LSLri killed renamable $r4, 3, 14 /* CC::al */, $noreg, $noreg
+ renamable $r2 = t2BICri killed renamable $r2, 7, 14 /* CC::al */, $noreg, $noreg
+ renamable $r1 = nsw t2MUL renamable $r0, killed renamable $r1, 14 /* CC::al */, $noreg
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+ liveins: $lr, $r0, $r8, $r9, $r10
+
+ renamable $r1 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg
+ renamable $r7 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg
+ renamable $r4 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ renamable $r1 = t2MUL renamable $r10, killed renamable $r1, 14 /* CC::al */, $noreg
+ renamable $r2 = t2ASRri renamable $r1, 31, 14 /* CC::al */, $noreg, $noreg
+ renamable $r3 = t2ADDrs renamable $r1, renamable $r2, 219, 14 /* CC::al */, $noreg, $noreg
+ renamable $r7 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg
+
+ bb.3:
+ successors: %bb.34(0x30000000), %bb.4(0x50000000)
+ liveins: $lr, $r0, $r1, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r3, renamable $r2 = t2LDR_POST killed renamable $r2, 4, 14 /* CC::al */, $noreg
+ t2CMPri renamable $r3, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.34, 0 /* CC::eq */, killed $cpsr
+
+ bb.4:
+ successors: %bb.5(0x19999998), %bb.6(0x66666668)
+ liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2RORri killed renamable $r1, 3, 14 /* CC::al */, $noreg, $noreg
+ t2CMPri renamable $r1, 3, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.6, 9 /* CC::ls */, killed $cpsr
+
+ bb.5:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.35, 14 /* CC::al */, $noreg
+
+ bb.6:
+ successors: %bb.7(0x20000000), %bb.16(0x20000000), %bb.25(0x20000000), %bb.32(0x20000000)
+ liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r12 = t2LEApcrelJT %jump-table.0, 14 /* CC::al */, $noreg
+ renamable $r5 = t2ADDrs killed renamable $r12, renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg
+ t2BR_JT killed renamable $r5, killed renamable $r1, %jump-table.0
+
+ bb.7:
+ successors: %bb.9(0x40000000), %bb.8(0x40000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ dead renamable $r1 = t2LSLri renamable $r3, 31, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.9, 0 /* CC::eq */, killed $cpsr
+
+ bb.8:
+ successors: %bb.9(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ renamable $r5 = t2ASRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+
+ bb.9:
+ successors: %bb.10(0x30000000), %bb.11(0x50000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.11, 5 /* CC::pl */, killed $cpsr
+
+ bb.10:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.35, 14 /* CC::al */, $noreg
+
+ bb.11:
+ successors: %bb.13(0x40000000), %bb.12(0x40000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ dead renamable $r1 = t2LSLri renamable $r3, 30, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.13, 5 /* CC::pl */, killed $cpsr
+
+ bb.12:
+ successors: %bb.13(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ renamable $r6 = t2LSRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+
+ bb.13:
+ successors: %bb.14(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+
+ bb.14:
+ successors: %bb.15(0x30000000), %bb.16(0x50000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.16, 5 /* CC::pl */, killed $cpsr
+
+ bb.15:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.35, 14 /* CC::al */, $noreg
+
+ bb.16:
+ successors: %bb.18(0x40000000), %bb.17(0x40000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ dead renamable $r1 = t2LSLri renamable $r3, 23, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.18, 5 /* CC::pl */, killed $cpsr
+
+ bb.17:
+ successors: %bb.18(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ renamable $r6 = t2ASRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+
+ bb.18:
+ successors: %bb.19(0x30000000), %bb.20(0x50000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.20, 5 /* CC::pl */, killed $cpsr
+
+ bb.19:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.35, 14 /* CC::al */, $noreg
+
+ bb.20:
+ successors: %bb.22(0x40000000), %bb.21(0x40000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ dead renamable $r1 = t2LSLri renamable $r3, 22, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.22, 5 /* CC::pl */, killed $cpsr
+
+ bb.21:
+ successors: %bb.22(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ renamable $r6 = t2LSRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+
+ bb.22:
+ successors: %bb.23(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+
+ bb.23:
+ successors: %bb.24(0x30000000), %bb.25(0x50000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.25, 5 /* CC::pl */, killed $cpsr
+
+ bb.24:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.35, 14 /* CC::al */, $noreg
+
+ bb.25:
+ successors: %bb.27(0x40000000), %bb.26(0x40000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ dead renamable $r1 = t2LSLri renamable $r3, 15, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.27, 5 /* CC::pl */, killed $cpsr
+
+ bb.26:
+ successors: %bb.27(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ renamable $r6 = t2ASRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+
+ bb.27:
+ successors: %bb.28(0x30000000), %bb.29(0x50000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.29, 5 /* CC::pl */, killed $cpsr
+
+ bb.28:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.35, 14 /* CC::al */, $noreg
+
+ bb.29:
+ successors: %bb.31(0x40000000), %bb.30(0x40000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ dead renamable $r1 = t2LSLri renamable $r3, 14, 14 /* CC::al */, $noreg, def $cpsr
+ t2Bcc %bb.31, 5 /* CC::pl */, killed $cpsr
+
+ bb.30:
+ successors: %bb.31(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r1 = t2ANDri renamable $r7, 31, 14 /* CC::al */, $noreg, $noreg
+ renamable $r6 = t2LSRri renamable $r7, 5, 14 /* CC::al */, $noreg, $noreg
+
+ bb.31:
+ successors: %bb.32(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+
+ bb.32:
+ successors: %bb.33(0x80000000)
+ liveins: $lr, $r0, $r2, $r3, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, def $cpsr
+
+ bb.33:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = nsw t2SUBrr killed renamable $r7, renamable $r0, 14 /* CC::al */, $noreg, $noreg
+ renamable $r1 = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
+ t2B %bb.35, 14 /* CC::al */, $noreg
+
+ bb.34:
+ successors: %bb.35(0x80000000)
+ liveins: $lr, $r0, $r1, $r2, $r4, $r7, $r8, $r9, $r10
+
+ renamable $r7 = t2MLA renamable $r1, renamable $r0, killed renamable $r7, 14 /* CC::al */, $noreg
+
+ bb.35:
+ successors: %bb.3(0x7c000000), %bb.36(0x04000000)
+ liveins: $lr, $r0, $r1, $r2, $r4, $r7, $r8, $r9, $r10
+
+ t2CMPrr renamable $r4, renamable $r8, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr
+
+ bb.36:
+ successors: %bb.37(0x04000000), %bb.2(0x7c000000)
+ liveins: $lr, $r0, $r8, $r9, $r10
+
+ t2CMPrr renamable $r10, renamable $lr, 14 /* CC::al */, $noreg, implicit-def $cpsr
+ t2Bcc %bb.2, 1 /* CC::ne */, killed $cpsr
+
+ bb.37:
+ $sp = frame-destroy tADDspi $sp, 3, 14 /* CC::al */, $noreg
+ $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc
+
+...
>From e3e17069901db0475266a09183739965e53bd174 Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Mon, 22 Jul 2024 12:19:58 +0100
Subject: [PATCH 2/3] Sort blocks by number in collectLoopBlockSet
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 32 ++++++++++++++--------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 0308ee22b1446..ea75313ac92ad 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2596,7 +2596,15 @@ void MachineBlockPlacement::rotateLoopWithProfile(
/// otherwise, collect all blocks in the loop.
MachineBlockPlacement::BlockFilterSet
MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
- BlockFilterSet LoopBlockSet;
+ // Collect the blocks in a set ordered by block number, as this gives the same
+ // order as they appear in the function.
+ struct MBBCompare {
+ bool operator()(const MachineBasicBlock *X,
+ const MachineBasicBlock *Y) const {
+ return X->getNumber() < Y->getNumber();
+ }
+ };
+ std::set<const MachineBasicBlock *, MBBCompare> LoopBlockSet;
// Filter cold blocks off from LoopBlockSet when profile data is available.
// Collect the sum of frequencies of incoming edges to the loop header from
@@ -2614,23 +2622,25 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
LoopFreq += MBFI->getBlockFreq(LoopPred) *
MBPI->getEdgeProbability(LoopPred, L.getHeader());
- for (auto &MBB : *F) {
- if (LoopBlockSet.count(&MBB) || !L.contains(&MBB))
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ if (LoopBlockSet.count(LoopBB))
continue;
- auto Freq = MBFI->getBlockFreq(&MBB).getFrequency();
+ auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
continue;
- BlockChain *Chain = BlockToChain[&MBB];
+ BlockChain *Chain = BlockToChain[LoopBB];
for (MachineBasicBlock *ChainBB : *Chain)
LoopBlockSet.insert(ChainBB);
}
- } else {
- for (auto &MBB : *F)
- if (L.contains(&MBB))
- LoopBlockSet.insert(&MBB);
- }
+ } else
+ LoopBlockSet.insert(L.block_begin(), L.block_end());
- return LoopBlockSet;
+ // Copy the blocks into a BlockFilterSet, as iterating it is faster than
+ // std::set. We will only remove blocks and never insert them, which will
+ // preserve the ordering.
+ BlockFilterSet Ret;
+ Ret.insert(LoopBlockSet.begin(), LoopBlockSet.end());
+ return Ret;
}
/// Forms basic block chains from the natural loop structures.
>From 791238984bd8f49ee2a7dc7cdca8eabfded947fa Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Tue, 23 Jul 2024 10:50:51 +0100
Subject: [PATCH 3/3] Fill BlockFilterSet when constructing
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index ea75313ac92ad..cb352a54ae222 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2638,8 +2638,7 @@ MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
// Copy the blocks into a BlockFilterSet, as iterating it is faster than
// std::set. We will only remove blocks and never insert them, which will
// preserve the ordering.
- BlockFilterSet Ret;
- Ret.insert(LoopBlockSet.begin(), LoopBlockSet.end());
+ BlockFilterSet Ret(LoopBlockSet.begin(), LoopBlockSet.end());
return Ret;
}
More information about the llvm-commits
mailing list