[llvm] 994eb5a - [CodeGen] Fix unconditional branch duplication issue in bbsections
Daniel Hoekwater via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 24 09:28:35 PDT 2023
Author: Daniel Hoekwater
Date: 2023-08-24T16:22:55Z
New Revision: 994eb5adc40cd001d82d0f95d18d1827b57e496c
URL: https://github.com/llvm/llvm-project/commit/994eb5adc40cd001d82d0f95d18d1827b57e496c
DIFF: https://github.com/llvm/llvm-project/commit/994eb5adc40cd001d82d0f95d18d1827b57e496c.diff
LOG: [CodeGen] Fix unconditional branch duplication issue in bbsections
If an end section basic block ends in an unconditional branch to its
fallthrough, BasicBlockSections will duplicate the unconditional branch.
This doesn't break x86, but it is a (slight) size optimization and more
importantly prevents AArch64 builds from breaking.
Ex:
```
bb1 (bbsections Hot):
jmp bb2
bb2 (bbsections Cold):
/* do work... */
```
After running sortBasicBlocksAndUpdateBranches():
```
bb1 (bbsections Hot):
jmp bb2
jmp bb2
bb2 (bbsections Cold):
/* do work... */
```
Differential Revision: https://reviews.llvm.org/D158674
Added:
Modified:
llvm/lib/CodeGen/BasicBlockSections.cpp
llvm/test/CodeGen/Generic/machine-function-splitter.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 33e70b160d9212..de7c17082fa4bb 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -258,7 +258,8 @@ void llvm::sortBasicBlocksAndUpdateBranches(
[[maybe_unused]] const MachineBasicBlock *EntryBlock = &MF.front();
SmallVector<MachineBasicBlock *> PreLayoutFallThroughs(MF.getNumBlockIDs());
for (auto &MBB : MF)
- PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
+ PreLayoutFallThroughs[MBB.getNumber()] =
+ MBB.getFallThrough(/*JumpToFallThrough=*/false);
MF.sort(MBBCmp);
assert(&MF.front() == EntryBlock &&
diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
index 1dceee4305d624..322175becdbcc3 100644
--- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll
+++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
@@ -6,11 +6,13 @@
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86
+; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -enable-split-machine-functions -mfs-allow-unsupported-triple -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-AARCH64
+; RUN: llc < %s -mtriple=aarch64 -enable-split-machine-functions -O0 -mfs-allow-unsupported-triple -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s --dump-input=always -check-prefixes=MFS-O0,MFS-O0-AARCH64
; COM: Machine function splitting with AFDO profiles
; RUN: sed 's/InstrProf/SampleProfile/g' %s > %t.ll
@@ -462,6 +464,29 @@ define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !27 {
ret void
}
+define void @foo16(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
+;; Check that an unconditional branch is only appended to a block
+;; if it would fall through to the wrong block otherwise.
+; MFS-O0-LABEL: foo16
+; MFS-O0-X86: jmp
+; MFS-O0-X86-NOT: jmp
+; MFS-O0-AARCH64: br
+; MFS-O0-AARCH64: br
+; MFS-O0-AARCH64-NOT: br
+; MFS-O0: .section .text.split.foo16
+; MFS-O0-NEXT: foo16.cold
+ %2 = call i32 @baz()
+ br i1 false, label %3, label %5, !prof !25
+
+3: ; preds = %1
+ %4 = call i32 @bar()
+ unreachable
+
+5: ; preds = %1
+ %6 = tail call i32 @qux()
+ ret void
+}
+
declare i32 @bar()
declare i32 @baz()
declare i32 @bam()
More information about the llvm-commits
mailing list