[lld] [lld][BP] Order .Tgm symbols for startup (PR #126328)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 7 16:32:03 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
Author: Ellis Hoag (ellishg)
<details>
<summary>Changes</summary>
The Global Function Merger (https://discourse.llvm.org/t/rfc-global-function-merging/82608) pass optimistically creates merged instances of functions and suffixes their names with `.Tgm`. Then in the linker, ICF will (hopefully) fold these `.Tgm` functions. For example, a function `foo` might become a thunk `foo` that calls a merged function `foo.Tgm`.
Since IRPGO runs before the global merger, we will only have a profile for `foo`. We want to correlate this profile to both `foo` and `foo.Tgm` so they can both be ordered to improve startup time.
I built a large binary and found that it increased the number of functions ordered for startup, as expected.
```
Functions for startup: 12049 -> 12697
Functions for compression: 34733 -> 34707
```
The reason why we don't see a larger improvement is because there are some cases where the code was accidentally working: `getRootSymbol("foo.llvm.5555.Tgm")` already returns `foo`.
---
Full diff: https://github.com/llvm/llvm-project/pull/126328.diff
4 Files Affected:
- (modified) lld/MachO/BPSectionOrderer.cpp (+2)
- (modified) lld/include/lld/Common/BPSectionOrdererBase.inc (+7-4)
- (modified) lld/test/ELF/bp-section-orderer.s (+56-10)
- (modified) lld/test/MachO/bp-section-orderer.s (+34-6)
``````````diff
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 689afd67712a417..f693e5e59f8faea 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -117,6 +117,8 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
auto *isec = subsec.isec;
if (!isec || isec->data.empty())
continue;
+ if (isa<ConcatInputSection>(isec) && !isec->isLive(0))
+ continue;
size_t idx = sections.size();
sections.emplace_back(isec);
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index 83b87f4c7817db0..b19d3670d34ccdc 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -147,11 +147,14 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
return sectionUns;
}
-/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
-/// "yyyy" are numbers that could change between builds. We need to use the
-/// root symbol name before this suffix so these symbols can be matched with
-/// profiles which may have different suffixes.
+/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
+/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
+/// the global merge functions suffix
+/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
+/// name before this suffix so these symbols can be matched with profiles which
+/// may have different suffixes.
inline StringRef getRootSymbol(StringRef name) {
+ name.consume_back(".Tgm");
auto [P0, S0] = name.rsplit(".llvm.");
auto [P1, S1] = P0.rsplit(".__uniq.");
return P1;
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 2e18107c02ca306..6a61e7e8cef2b8d 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -1,3 +1,4 @@
+# NOTE: Code has been autogenerated by utils/update_test_body.py
# REQUIRES: aarch64
# RUN: rm -rf %t && split-file %s %t && cd %t
@@ -25,30 +26,33 @@
# RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
# RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
-# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,{{$}}
+# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d3,d2,d1,{{$}}
# RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
# RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
-# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d3,d2,d4,d1,{{$}}
+# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,merged1,merged2,_start,d3,d2,d4,d1,{{$}}
# RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
+# RUN: ld.lld -o out.cf.icf a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-ICF-FUNC
# RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
-# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,{{$}}
+# CFUNC: s5,s4,s3,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d3,d2,d1,{{$}}
# RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
# RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
-# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,_start,d4,d1,d3,d2,{{$}}
+# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
# RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
-# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
+# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CBOTH
+# CBOTH: s5,s3,s4,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d1,d3,d2,{{$}}
# RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
# RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
-# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,_start,d4,d1,d3,d2,{{$}}
+# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
-# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# BP-COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections using balanced partitioning
# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
-# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
+# BP-COMPRESSION-BOTH: Ordered 18 sections using balanced partitioning
#--- a.proftext
:ir
@@ -125,6 +129,9 @@ int C(int a) { A(); return a + 2; }
int B(int a) { A(); return a + 1; }
void A() {}
+int merged1(int a) { return F(a + 101); }
+int merged2(int a) { return F(a + 101); }
+
int _start() { return 0; }
#--- gen
@@ -236,6 +243,44 @@ A: // @A
.Lfunc_end5:
.size A, .Lfunc_end5-A
// -- End function
+ .section .text.merged1,"ax", at progbits
+ .globl merged1 // -- Begin function merged1
+ .p2align 2
+ .type merged1, at function
+merged1: // @merged1
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #101
+ bl F
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end6:
+ .size merged1, .Lfunc_end6-merged1
+ // -- End function
+ .section .text.merged2,"ax", at progbits
+ .globl merged2 // -- Begin function merged2
+ .p2align 2
+ .type merged2, at function
+merged2: // @merged2
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #101
+ bl F
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end7:
+ .size merged2, .Lfunc_end7-merged2
+ // -- End function
.section .text._start,"ax", at progbits
.globl _start // -- Begin function _start
.p2align 2
@@ -244,8 +289,8 @@ _start: // @_start
// %bb.0: // %entry
mov w0, wzr
ret
-.Lfunc_end6:
- .size _start, .Lfunc_end6-_start
+.Lfunc_end8:
+ .size _start, .Lfunc_end8-_start
// -- End function
.type s5, at object // @s5
.section .rodata.s5,"a", at progbits
@@ -330,6 +375,7 @@ d1:
.section ".note.GNU-stack","", at progbits
.addrsig
+ .addrsig_sym F
.addrsig_sym C
.addrsig_sym B
.addrsig_sym A
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index 2eaff04bdc04700..136669e034c6a9a 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -5,11 +5,12 @@
# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
+# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP-ICF
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile %t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
-# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --bp-compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP
-# STARTUP: Ordered 3 sections using balanced partitioning
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --bp-compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP-ICF
+# STARTUP: Ordered 5 sections using balanced partitioning
+# STARTUP-ICF: Ordered 4 sections using balanced partitioning
# Check that orderfiles take precedence over BP
# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o - %t/a.o -order_file %t/a.orderfile --irpgo-profile-sort=%t/a.profdata | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
@@ -26,6 +27,8 @@
# ORDERFILE-DAG: _main
# ORDERFILE-DAG: _B
# ORDERFILE-DAG: l_C
+# ORDERFILE-DAG: merged1.Tgm
+# ORDERFILE-DAG: merged2.Tgm
# Data
# ORDERFILE: s3
@@ -42,13 +45,15 @@
# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ICF-FUNC
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=%t/a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# COMPRESSION-ICF-FUNC: Ordered 7 sections using balanced partitioning
# COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 14 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
#--- a.s
.text
@@ -78,6 +83,12 @@ F:
add w0, w0, #3
bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
ret
+merged1.Tgm:
+ add w0, w0, #101
+ ret
+merged2.Tgm:
+ add w0, w0, #101
+ ret
.data
s1:
@@ -106,7 +117,7 @@ r4:
1
# Weight
1
-A, B, C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+A, B, C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666, merged1, merged2
A
# Func Hash:
@@ -140,6 +151,23 @@ D
# Counter Values:
1
+merged1
+# Func Hash:
+5555
+# Num Counters:
+1
+# Counter Values:
+1
+
+merged2
+# Func Hash:
+6666
+# Num Counters:
+1
+# Counter Values:
+1
+
+
#--- a.orderfile
A
F
``````````
</details>
https://github.com/llvm/llvm-project/pull/126328
More information about the llvm-commits
mailing list