[lld] 79fff6a - [lld][BP] Avoid ordering ICF'ed sections (#126327)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 13 08:57:48 PST 2025
Author: Ellis Hoag
Date: 2025-02-13T08:57:44-08:00
New Revision: 79fff6aa324c560f4a32d5d2b0276744a2c49668
URL: https://github.com/llvm/llvm-project/commit/79fff6aa324c560f4a32d5d2b0276744a2c49668
DIFF: https://github.com/llvm/llvm-project/commit/79fff6aa324c560f4a32d5d2b0276744a2c49668.diff
LOG: [lld][BP] Avoid ordering ICF'ed sections (#126327)
ICF runs before BPSectionOrderer. When a section is ICF'ed, it seems
that the original sections are marked as not live, but are still kept
around. Prior to this patch, those ICF'ed sections would be passed to BP
and ordered before being skipped when writing the output. Now, these
sections are no longer passed to BP, saving runtime and possibly
improving BP's output.
In a large binary, I found that the number of sections ordered using BP
decreased, while the number of duplicate sections drastically decreased
as expected.
```
Functions for startup: 50755 -> 50520
Functions for compression: 165734 -> 105328
Duplicate functions: 1827231 -> 55230
```
Added:
Modified:
lld/ELF/BPSectionOrderer.cpp
lld/MachO/BPSectionOrderer.cpp
lld/test/ELF/bp-section-orderer.s
lld/test/MachO/bp-section-orderer.s
Removed:
################################################################################
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 01f77b33926f7..4adb42ef4ff93 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -75,8 +75,11 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
auto *d = dyn_cast<Defined>(&sym);
if (!d)
return;
- auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
- if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
+ auto *sec = dyn_cast_or_null<InputSection>(d->section);
+ // Skip empty, discarded, ICF folded sections. Skipping ICF folded sections
+ // reduces duplicate detection work in BPSectionOrderer.
+ if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
+ !orderer.secToSym.try_emplace(sec, d).second)
return;
rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
.insert(sections.size());
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 689afd67712a4..950afd0421f06 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -117,6 +117,10 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
auto *isec = subsec.isec;
if (!isec || isec->data.empty())
continue;
+ // ConcatInputSections are entirely live or dead, so the offset is
+ // irrelevant.
+ if (isa<ConcatInputSection>(isec) && !isec->isLive(0))
+ continue;
size_t idx = sections.size();
sections.emplace_back(isec);
for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 2e18107c02ca3..4889db63cd4d1 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -1,3 +1,4 @@
+# NOTE: Code has been autogenerated by utils/update_test_body.py
# REQUIRES: aarch64
# RUN: rm -rf %t && split-file %s %t && cd %t
@@ -18,37 +19,40 @@
# RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
# RUN: llvm-profdata merge a.proftext -o a.profdata
-# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
+# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
# STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
# STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
# RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
# RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
-# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,{{$}}
+# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d3,d2,d1,{{$}}
# RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
# RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
-# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d3,d2,d4,d1,{{$}}
+# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,merged1,merged2,_start,d3,d2,d4,d1,{{$}}
# RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
+# RUN: ld.lld -o out.cf.icf a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-ICF-FUNC
# RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
-# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,{{$}}
+# CFUNC: s5,s4,s3,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d3,d2,d1,{{$}}
# RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
# RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
-# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,_start,d4,d1,d3,d2,{{$}}
+# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
# RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
-# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
+# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CBOTH
+# CBOTH: s5,s3,s4,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d1,d3,d2,{{$}}
# RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
# RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
-# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,_start,d4,d1,d3,d2,{{$}}
+# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
-# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# BP-COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections using balanced partitioning
# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
-# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
+# BP-COMPRESSION-BOTH: Ordered 18 sections using balanced partitioning
#--- a.proftext
:ir
@@ -114,17 +118,24 @@ int d3[] = {5,6,7,8};
int d2[] = {7,8,9,10};
int d1[] = {3,4,5,6};
+// used is to suppress compiler garbage collection in ELF; retain is to suppress linker garbage collection; used is not needed for non-internal linkage symbols
+// used is for both compiler/linker GC in Mach-O; retain is ignored for Mach-O
+#define RETAIN [[gnu::used,gnu::retain]]
+
int C(int a);
int B(int a);
void A();
int F(int a) { return C(a + 3); }
-int E(int a) { return C(a + 2); }
-int D(int a) { return B(a + 2); }
+RETAIN int E(int a) { return C(a + 2); }
+RETAIN int D(int a) { return B(a + 2); }
int C(int a) { A(); return a + 2; }
int B(int a) { A(); return a + 1; }
void A() {}
+RETAIN int merged1(int a) { return F(a + 101); }
+int merged2(int a) { return F(a + 101); }
+
int _start() { return 0; }
#--- gen
@@ -169,7 +180,7 @@ C: // @C
.Lfunc_end1:
.size C, .Lfunc_end1-C
// -- End function
- .section .text.E,"ax", at progbits
+ .section .text.E,"axR", at progbits
.globl E // -- Begin function E
.p2align 2
.type E, at function
@@ -188,7 +199,7 @@ E: // @E
.Lfunc_end2:
.size E, .Lfunc_end2-E
// -- End function
- .section .text.D,"ax", at progbits
+ .section .text.D,"axR", at progbits
.globl D // -- Begin function D
.p2align 2
.type D, at function
@@ -236,6 +247,44 @@ A: // @A
.Lfunc_end5:
.size A, .Lfunc_end5-A
// -- End function
+ .section .text.merged1,"axR", at progbits
+ .globl merged1 // -- Begin function merged1
+ .p2align 2
+ .type merged1, at function
+merged1: // @merged1
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #101
+ bl F
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end6:
+ .size merged1, .Lfunc_end6-merged1
+ // -- End function
+ .section .text.merged2,"ax", at progbits
+ .globl merged2 // -- Begin function merged2
+ .p2align 2
+ .type merged2, at function
+merged2: // @merged2
+// %bb.0: // %entry
+ sub sp, sp, #32
+ stp x29, x30, [sp, #16] // 16-byte Folded Spill
+ add x29, sp, #16
+ stur w0, [x29, #-4]
+ ldur w8, [x29, #-4]
+ add w0, w8, #101
+ bl F
+ ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+ add sp, sp, #32
+ ret
+.Lfunc_end7:
+ .size merged2, .Lfunc_end7-merged2
+ // -- End function
.section .text._start,"ax", at progbits
.globl _start // -- Begin function _start
.p2align 2
@@ -244,8 +293,8 @@ _start: // @_start
// %bb.0: // %entry
mov w0, wzr
ret
-.Lfunc_end6:
- .size _start, .Lfunc_end6-_start
+.Lfunc_end8:
+ .size _start, .Lfunc_end8-_start
// -- End function
.type s5, at object // @s5
.section .rodata.s5,"a", at progbits
@@ -330,6 +379,10 @@ d1:
.section ".note.GNU-stack","", at progbits
.addrsig
+ .addrsig_sym F
.addrsig_sym C
+ .addrsig_sym E
+ .addrsig_sym D
.addrsig_sym B
.addrsig_sym A
+ .addrsig_sym merged1
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index 2eaff04bdc047..f0ac1bd99f722 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -42,13 +42,15 @@
# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ICF-FUNC
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=%t/a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# COMPRESSION-ICF-FUNC: Ordered 7 sections using balanced partitioning
# COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 14 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
#--- a.s
.text
@@ -78,6 +80,12 @@ F:
add w0, w0, #3
bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
ret
+merged1:
+ add w0, w0, #101
+ ret
+merged2:
+ add w0, w0, #101
+ ret
.data
s1:
More information about the llvm-commits
mailing list