[llvm] e223e45 - Reland "[AArch64][CodeGen] Avoid inverting hot branches during relaxation""
Daniel Hoekwater via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 21 10:30:03 PDT 2023
Author: Daniel Hoekwater
Date: 2023-08-21T17:29:47Z
New Revision: e223e4567722661f1b32aa052cd13d9f47b896d9
URL: https://github.com/llvm/llvm-project/commit/e223e4567722661f1b32aa052cd13d9f47b896d9
DIFF: https://github.com/llvm/llvm-project/commit/e223e4567722661f1b32aa052cd13d9f47b896d9.diff
LOG: Reland "[AArch64][CodeGen] Avoid inverting hot branches during relaxation""
This is a reland of 46d2d7599d9ed5e68fb53e910feb10d47ee2667b, which was
reverted because of breaking build
https://lab.llvm.org/buildbot/#/builders/21/builds/78779. However, this
buildbot is spuriously broken due to Flang::underscoring.f90 being
nondeterministic.
Added:
Modified:
llvm/lib/CodeGen/BranchRelaxation.cpp
llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 378f8bfda20361..2995732e0aa86b 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -79,6 +79,10 @@ class BranchRelaxation : public MachineFunctionPass {
};
SmallVector<BasicBlockInfo, 16> BlockInfo;
+
+ // The basic block after which trampolines are inserted. This is the last
+ // basic block that isn't in the cold section.
+ MachineBasicBlock *TrampolineInsertionPoint = nullptr;
std::unique_ptr<RegScavenger> RS;
LivePhysRegs LiveRegs;
@@ -166,16 +170,27 @@ LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
void BranchRelaxation::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
+ TrampolineInsertionPoint = nullptr;
// First thing, compute the size of all basic blocks, and see if the function
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
- // instructions in the inline assembly.
- for (MachineBasicBlock &MBB : *MF)
+ // instructions in the inline assembly. At the same time, place the
+ // trampoline insertion point at the end of the hot portion of the function.
+ for (MachineBasicBlock &MBB : *MF) {
BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
+ if (MBB.getSectionID() != MBBSectionID::ColdSectionID)
+ TrampolineInsertionPoint = &MBB;
+ }
+
// Compute block offsets and known bits.
adjustBlockOffsets(*MF->begin());
+
+ if (TrampolineInsertionPoint == nullptr) {
+ LLVM_DEBUG(dbgs() << " No suitable trampoline insertion point found in "
+ << MF->getName() << ".\n");
+ }
}
/// computeBlockSize - Compute the size for MBB.
@@ -376,6 +391,50 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
assert(!Fail && "branches to be relaxed must be analyzable");
(void)Fail;
+ // Since cross-section conditional branches to the cold section are rarely
+ // taken, try to avoid inverting the condition. Instead, add a "trampoline
+ // branch", which unconditionally branches to the branch destination. Place
+ // the trampoline branch at the end of the function and retarget the
+ // conditional branch to the trampoline.
+ // tbz L1
+ // =>
+ // tbz L1Trampoline
+ // ...
+ // L1Trampoline: b L1
+ if (MBB->getSectionID() != TBB->getSectionID() &&
+ TBB->getSectionID() == MBBSectionID::ColdSectionID &&
+ TrampolineInsertionPoint != nullptr) {
+ // If the insertion point is out of range, we can't put a trampoline there.
+ NewBB =
+ createNewBlockAfter(*TrampolineInsertionPoint, MBB->getBasicBlock());
+
+ if (isBlockInRange(MI, *NewBB)) {
+ LLVM_DEBUG(dbgs() << " Retarget destination to trampoline at "
+ << NewBB->back());
+
+ insertUncondBranch(NewBB, TBB);
+
+ // Update the successor lists to include the trampoline.
+ MBB->replaceSuccessor(TBB, NewBB);
+ NewBB->addSuccessor(TBB);
+
+ // Replace branch in the current (MBB) block.
+ removeBranch(MBB);
+ insertBranch(MBB, NewBB, FBB, Cond);
+
+ TrampolineInsertionPoint = NewBB;
+ finalizeBlockChanges(MBB, NewBB);
+ return true;
+ }
+
+ LLVM_DEBUG(
+ dbgs() << " Trampoline insertion point out of range for Bcc from "
+ << printMBBReference(*MBB) << " to " << printMBBReference(*TBB)
+ << ".\n");
+ TrampolineInsertionPoint->setIsEndSection(NewBB->isEndSection());
+ MF->erase(NewBB);
+ }
+
// Add an unconditional branch to the destination and invert the branch
// condition to jump over it:
// tbz L1
diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
index 1cf307cd16ecff..231bc886dd3b74 100644
--- a/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
+++ b/llvm/test/CodeGen/AArch64/branch-relax-cross-section.mir
@@ -1,4 +1,5 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-b-offset-bits=64 -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass branch-relaxation -aarch64-tbz-offset-bits=9 -aarch64-cbz-offset-bits=9 %s -o - | FileCheck --check-prefix=INDIRECT %s
--- |
declare i32 @bar()
@@ -21,6 +22,73 @@
br label %end
}
+ define void @tbz_hot_to_cold(i1 zeroext %0) {
+ br i1 %0, label %hot_block, label %cold_block
+
+ hot_block: ; preds = %1
+ %2 = call i32 @baz()
+ br label %end
+
+ end: ; preds = %cold_block, %hot_block
+ %3 = tail call i32 @qux()
+ ret void
+
+ cold_block: ; preds = %1
+ %4 = call i32 @bar()
+ br label %end
+ }
+
+ define void @tbz_no_valid_tramp(i1 zeroext %0) {
+ br i1 %0, label %hot, label %cold
+
+ hot: ; preds = %1
+ %2 = call i32 @baz()
+ call void asm sideeffect ".space 1024", ""()
+ br label %end
+
+ end: ; preds = %cold, %hot
+ %3 = tail call i32 @qux()
+ ret void
+
+ cold: ; preds = %1
+ %4 = call i32 @bar()
+ br label %end
+ }
+
+ define void @tbz_cold_to_hot(i1 zeroext %0) #0 {
+ br i1 %0, label %cold_block, label %hot_block
+
+ cold_block: ; preds = %1
+ %2 = call i32 @baz()
+ br label %end
+
+ end: ; preds = %hot_block, %cold_block
+ %3 = tail call i32 @qux()
+ ret void
+
+ hot_block: ; preds = %1
+ %4 = call i32 @bar()
+ br label %end
+ }
+
+ define void @tbz_tramp_pushed_oob(i1 zeroext %0, i1 zeroext %1) {
+ entry:
+ %x16 = call i64 asm sideeffect "mov x16, 1", "={x16}"()
+ br i1 %0, label %unrelaxable, label %cold
+
+ unrelaxable: ; preds = %entry
+ br i1 %1, label %end, label %cold
+
+ end: ; preds = %unrelaxable
+ call void asm sideeffect ".space 996", ""()
+ call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16)
+ ret void
+
+ cold: ; preds = %entry, %unrelaxable
+ call void asm sideeffect "# reg use $0", "{x16}"(i64 %x16)
+ ret void
+ }
+
...
---
name: relax_tbz
@@ -69,3 +137,201 @@ body: |
early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
...
+---
+name: tbz_hot_to_cold
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+body: |
+ ; CHECK-LABEL: name: tbz_hot_to_cold
+ ; COM: Check that branch relaxation relaxes cross-section conditional
+ ; COM: branches by creating trampolines after all other hot basic blocks.
+ ; CHECK: bb.0 (%ir-block.1):
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK: TBZW
+ ; CHECK-SAME: %bb.3
+ ; CHECK: bb.1.hot_block:
+ ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+ ; CHECK: bb.3 (%ir-block.1):
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.cold_block (bbsections Cold):
+ ; CHECK: TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+ bb.0 (%ir-block.1):
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $w0, $lr
+
+ early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+ TBZW killed renamable $w0, 0, %bb.2
+
+ bb.1.hot_block:
+ BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+ bb.2.cold_block (bbsections Cold):
+ BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...
+---
+name: tbz_no_valid_tramp
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ ; CHECK-LABEL: name: tbz_no_valid_tramp
+ ; COM: Check that branch relaxation doesn't insert a trampoline if there is no
+ ; COM: viable insertion location.
+ ; CHECK: bb.0 (%ir-block.1):
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK: CBNZW
+ ; CHECK-SAME: %bb.1
+ ; CHECK-NEXT: B
+ ; CHECK-SAME: %bb.3
+ ; CHECK: bb.1.hot:
+ ; CHECK: TCRETURNdi
+ ; CHECK: bb.2.cold (bbsections Cold):
+ ; CHECK: TCRETURNdi
+ bb.0 (%ir-block.1):
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $w0, $lr
+
+ early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+ CBZW killed renamable $w0, %bb.2
+
+ bb.1.hot:
+ BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ INLINEASM &".space 1024", 1 /* sideeffect attdialect */
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+ bb.2.cold (bbsections Cold):
+ BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...
+---
+name: tbz_cold_to_hot
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ ; CHECK-LABEL: name: tbz_cold_to_hot
+ ; COM: Check that relaxation of conditional branches from the Cold section to
+ ; COM: the Hot section doesn't modify the Hot section.
+ ; CHECK: bb.0 (%ir-block.1, bbsections Cold):
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: CBNZW
+ ; CHECK-SAME: %bb.1
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK: bb.1.cold_block (bbsections Cold):
+ ; CHECK: TCRETURNdi
+ ; CHECK: bb.2.hot_block:
+ ; CHECK: TCRETURNdi
+ bb.0 (%ir-block.1, bbsections Cold):
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $w0, $lr
+
+ early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+ CBZW killed renamable $w0, %bb.2
+
+ bb.1.cold_block (bbsections Cold):
+ BL @baz, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+ bb.2.hot_block:
+ BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...
+---
+name: tbz_tramp_pushed_oob
+tracksRegLiveness: true
+liveins:
+ - { reg: '$w0', virtual-reg: '' }
+ - { reg: '$w1', virtual-reg: '' }
+stack:
+ - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
+ stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ hasRedZone: false
+body: |
+ ; INDIRECT-LABEL: name: tbz_tramp_pushed_oob
+ ; COM: Check that a conditional branch to a trampoline is properly relaxed
+ ; COM: if the trampoline is pushed out of range.
+ ; INDIRECT: bb.0.entry:
+ ; INDIRECT-NEXT: successors: %bb.1(0x40000000), %[[TRAMP1:bb.[0-9]+]](0x40000000)
+ ; INDIRECT: TBNZW
+ ; INDIRECT-SAME: %bb.1
+ ; INDIRECT-NEXT: B{{ }}
+ ; INDIRECT-SAME: %[[TRAMP1]]
+ ; INDIRECT: bb.1.unrelaxable:
+ ; INDIRECT-NEXT: successors: %bb.2(0x40000000), %[[TRAMP2:bb.[0-9]+]](0x40000000)
+ ; INDIRECT: TBNZW
+ ; INDIRECT-SAME: %bb.2
+ ; INDIRECT: [[TRAMP2]]
+ ; INDIRECT-NEXT: successors: %bb.3(0x80000000)
+ ; INDIRECT: bb.2.end:
+ ; INDIRECT: TCRETURNdi
+ ; INDIRECT: [[TRAMP1]].entry:
+ ; INDIRECT: successors: %bb.3(0x80000000)
+ ; INDIRECT-NOT: bbsections Cold
+ ; INDIRECT: bb.3.cold (bbsections Cold):
+ ; INDIRECT: TCRETURNdi
+
+ bb.0.entry (%ir-block.entry):
+ successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ liveins: $w0, $w1, $lr
+
+ early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0)
+ INLINEASM &"mov x16, 1", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $x16
+ TBZW killed renamable $w0, 0, %bb.3
+
+ bb.1.unrelaxable:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ liveins: $w1, $x16
+
+ TBNZW killed renamable $w1, 0, %bb.2
+
+ B %bb.3
+
+ bb.2.end:
+ liveins: $x16
+
+ INLINEASM &".space 996", 1 /* sideeffect attdialect */
+ INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+ bb.3.cold (bbsections Cold):
+ liveins: $x16
+
+ INLINEASM &"# reg use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $x16
+ early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0)
+ TCRETURNdi @qux, 0, csr_aarch64_aapcs, implicit $sp
+
+...
More information about the llvm-commits
mailing list