[lld] 79fff6a - [lld][BP] Avoid ordering ICF'ed sections (#126327)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 13 08:57:48 PST 2025


Author: Ellis Hoag
Date: 2025-02-13T08:57:44-08:00
New Revision: 79fff6aa324c560f4a32d5d2b0276744a2c49668

URL: https://github.com/llvm/llvm-project/commit/79fff6aa324c560f4a32d5d2b0276744a2c49668
DIFF: https://github.com/llvm/llvm-project/commit/79fff6aa324c560f4a32d5d2b0276744a2c49668.diff

LOG: [lld][BP] Avoid ordering ICF'ed sections (#126327)

ICF runs before BPSectionOrderer. When a section is ICF'ed, it seems
that the original sections are marked as not live, but are still kept
around. Prior to this patch, those ICF'ed sections would be passed to BP
and ordered before being skipped when writing the output. Now, these
sections are no longer passed to BP, saving runtime and possibly
improving BP's output.

In a large binary, I found that the number of sections ordered using BP
decreased, while the number of duplicate sections drastically decreased
as expected.
```
Functions for startup: 50755 -> 50520
Functions for compression: 165734 -> 105328
Duplicate functions: 1827231 -> 55230
```

Added: 
    

Modified: 
    lld/ELF/BPSectionOrderer.cpp
    lld/MachO/BPSectionOrderer.cpp
    lld/test/ELF/bp-section-orderer.s
    lld/test/MachO/bp-section-orderer.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 01f77b33926f7..4adb42ef4ff93 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -75,8 +75,11 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
     auto *d = dyn_cast<Defined>(&sym);
     if (!d)
       return;
-    auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
-    if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
+    auto *sec = dyn_cast_or_null<InputSection>(d->section);
+    // Skip empty, discarded, ICF folded sections. Skipping ICF folded sections
+    // reduces duplicate detection work in BPSectionOrderer.
+    if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
+        !orderer.secToSym.try_emplace(sec, d).second)
       return;
     rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
         .insert(sections.size());

diff  --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 689afd67712a4..950afd0421f06 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -117,6 +117,10 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
         auto *isec = subsec.isec;
         if (!isec || isec->data.empty())
           continue;
+        // ConcatInputSections are entirely live or dead, so the offset is
+        // irrelevant.
+        if (isa<ConcatInputSection>(isec) && !isec->isLive(0))
+          continue;
         size_t idx = sections.size();
         sections.emplace_back(isec);
         for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {

diff  --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 2e18107c02ca3..4889db63cd4d1 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -1,3 +1,4 @@
+# NOTE: Code has been autogenerated by utils/update_test_body.py
 # REQUIRES: aarch64
 # RUN: rm -rf %t && split-file %s %t && cd %t
 
@@ -18,37 +19,40 @@
 
 # RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
 # RUN: llvm-profdata merge a.proftext -o a.profdata
-# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
+# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
 
 # STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
 # STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
 
 # RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
 # RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
-# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,{{$}}
+# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d3,d2,d1,{{$}}
 
 # RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
 # RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
-# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d3,d2,d4,d1,{{$}}
+# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,merged1,merged2,_start,d3,d2,d4,d1,{{$}}
 
 # RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
+# RUN: ld.lld -o out.cf.icf a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-ICF-FUNC
 # RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
-# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,{{$}}
+# CFUNC: s5,s4,s3,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d3,d2,d1,{{$}}
 
 # RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
 # RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
-# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,_start,d4,d1,d3,d2,{{$}}
+# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
 
 # RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
-# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
+# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CBOTH
+# CBOTH: s5,s3,s4,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d1,d3,d2,{{$}}
 
 # RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
 # RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
-# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,_start,d4,d1,d3,d2,{{$}}
+# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
 
-# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# BP-COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections using balanced partitioning
 # BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
-# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
+# BP-COMPRESSION-BOTH: Ordered 18 sections using balanced partitioning
 
 #--- a.proftext
 :ir
@@ -114,17 +118,24 @@ int d3[] = {5,6,7,8};
 int d2[] = {7,8,9,10};
 int d1[] = {3,4,5,6};
 
+// used is to suppress compiler garbage collection in ELF; retain is to suppress linker garbage collection; used is not needed for non-internal linkage symbols
+// used is for both compiler/linker GC in Mach-O; retain is ignored for Mach-O
+#define RETAIN [[gnu::used,gnu::retain]]
+
 int C(int a);
 int B(int a);
 void A();
 
 int F(int a) { return C(a + 3); }
-int E(int a) { return C(a + 2); }
-int D(int a) { return B(a + 2); }
+RETAIN int E(int a) { return C(a + 2); }
+RETAIN int D(int a) { return B(a + 2); }
 int C(int a) { A(); return a + 2; }
 int B(int a) { A(); return a + 1; }
 void A() {}
 
+RETAIN int merged1(int a) { return F(a + 101); }
+int merged2(int a) { return F(a + 101); }
+
 int _start() { return 0; }
 
 #--- gen
@@ -169,7 +180,7 @@ C:                                      // @C
 .Lfunc_end1:
 	.size	C, .Lfunc_end1-C
                                         // -- End function
-	.section	.text.E,"ax", at progbits
+	.section	.text.E,"axR", at progbits
 	.globl	E                               // -- Begin function E
 	.p2align	2
 	.type	E, at function
@@ -188,7 +199,7 @@ E:                                      // @E
 .Lfunc_end2:
 	.size	E, .Lfunc_end2-E
                                         // -- End function
-	.section	.text.D,"ax", at progbits
+	.section	.text.D,"axR", at progbits
 	.globl	D                               // -- Begin function D
 	.p2align	2
 	.type	D, at function
@@ -236,6 +247,44 @@ A:                                      // @A
 .Lfunc_end5:
 	.size	A, .Lfunc_end5-A
                                         // -- End function
+	.section	.text.merged1,"axR", at progbits
+	.globl	merged1                         // -- Begin function merged1
+	.p2align	2
+	.type	merged1, at function
+merged1:                                // @merged1
+// %bb.0:                               // %entry
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #101
+	bl	F
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end6:
+	.size	merged1, .Lfunc_end6-merged1
+                                        // -- End function
+	.section	.text.merged2,"ax", at progbits
+	.globl	merged2                         // -- Begin function merged2
+	.p2align	2
+	.type	merged2, at function
+merged2:                                // @merged2
+// %bb.0:                               // %entry
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #101
+	bl	F
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end7:
+	.size	merged2, .Lfunc_end7-merged2
+                                        // -- End function
 	.section	.text._start,"ax", at progbits
 	.globl	_start                          // -- Begin function _start
 	.p2align	2
@@ -244,8 +293,8 @@ _start:                                 // @_start
 // %bb.0:                               // %entry
 	mov	w0, wzr
 	ret
-.Lfunc_end6:
-	.size	_start, .Lfunc_end6-_start
+.Lfunc_end8:
+	.size	_start, .Lfunc_end8-_start
                                         // -- End function
 	.type	s5, at object                      // @s5
 	.section	.rodata.s5,"a", at progbits
@@ -330,6 +379,10 @@ d1:
 
 	.section	".note.GNU-stack","", at progbits
 	.addrsig
+	.addrsig_sym F
 	.addrsig_sym C
+	.addrsig_sym E
+	.addrsig_sym D
 	.addrsig_sym B
 	.addrsig_sym A
+	.addrsig_sym merged1

diff  --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index 2eaff04bdc047..f0ac1bd99f722 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -42,13 +42,15 @@
 # RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
 
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ICF-FUNC
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=%t/a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
 
-# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# COMPRESSION-ICF-FUNC: Ordered 7 sections using balanced partitioning
 # COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 14 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
 
 #--- a.s
 .text
@@ -78,6 +80,12 @@ F:
   add w0, w0, #3
   bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
   ret
+merged1:
+  add w0, w0, #101
+  ret
+merged2:
+  add w0, w0, #101
+  ret
 
 .data
 s1:


        


More information about the llvm-commits mailing list