[lld] [lld][BP] Avoid ordering ICF'ed sections (PR #126327)

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 10 09:22:17 PST 2025


https://github.com/ellishg updated https://github.com/llvm/llvm-project/pull/126327

>From ca93859385faf9cdce53d5cd2208a1259128e6c0 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Fri, 7 Feb 2025 16:14:57 -0800
Subject: [PATCH 1/2] [lld][BP] Avoid ordering ICF'ed sections

---
 lld/MachO/BPSectionOrderer.cpp      |  2 +
 lld/test/ELF/bp-section-orderer.s   | 66 ++++++++++++++++++++++++-----
 lld/test/MachO/bp-section-orderer.s | 12 +++++-
 3 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 689afd67712a417..f693e5e59f8faea 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -117,6 +117,8 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
         auto *isec = subsec.isec;
         if (!isec || isec->data.empty())
           continue;
+        if (isa<ConcatInputSection>(isec) && !isec->isLive(0))
+          continue;
         size_t idx = sections.size();
         sections.emplace_back(isec);
         for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 2e18107c02ca306..6a61e7e8cef2b8d 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -1,3 +1,4 @@
+# NOTE: Code has been autogenerated by utils/update_test_body.py
 # REQUIRES: aarch64
 # RUN: rm -rf %t && split-file %s %t && cd %t
 
@@ -25,30 +26,33 @@
 
 # RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
 # RUN: llvm-nm -jn out.s | tr '\n' , | FileCheck %s --check-prefix=STARTUP
-# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,_start,d4,d3,d2,d1,{{$}}
+# STARTUP: s5,s4,s3,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d3,d2,d1,{{$}}
 
 # RUN: ld.lld -o out.os a.o --irpgo-profile=a.profdata --bp-startup-sort=function --symbol-ordering-file a.txt
 # RUN: llvm-nm -jn out.os | tr '\n' , | FileCheck %s --check-prefix=ORDER-STARTUP
-# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,_start,d3,d2,d4,d1,{{$}}
+# ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,merged1,merged2,_start,d3,d2,d4,d1,{{$}}
 
 # RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
+# RUN: ld.lld -o out.cf.icf a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-ICF-FUNC
 # RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
-# CFUNC: s5,s4,s3,s2,s1,F,C,E,D,B,A,_start,d4,d3,d2,d1,{{$}}
+# CFUNC: s5,s4,s3,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d3,d2,d1,{{$}}
 
 # RUN: ld.lld -o out.cd a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-DATA
 # RUN: llvm-nm -jn out.cd | tr '\n' , | FileCheck %s --check-prefix=CDATA
-# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,_start,d4,d1,d3,d2,{{$}}
+# CDATA: s5,s3,s4,s2,s1,F,C,E,D,B,A,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
 
 # RUN: ld.lld -o out.cb a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
-# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CDATA
+# RUN: llvm-nm -jn out.cb | tr '\n' , | FileCheck %s --check-prefix=CBOTH
+# CBOTH: s5,s3,s4,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d1,d3,d2,{{$}}
 
 # RUN: ld.lld -o out.cbs a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-BOTH
 # RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
-# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,_start,d4,d1,d3,d2,{{$}}
+# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
 
-# BP-COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# BP-COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections using balanced partitioning
 # BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
-# BP-COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
+# BP-COMPRESSION-BOTH: Ordered 18 sections using balanced partitioning
 
 #--- a.proftext
 :ir
@@ -125,6 +129,9 @@ int C(int a) { A(); return a + 2; }
 int B(int a) { A(); return a + 1; }
 void A() {}
 
+int merged1(int a) { return F(a + 101); }
+int merged2(int a) { return F(a + 101); }
+
 int _start() { return 0; }
 
 #--- gen
@@ -236,6 +243,44 @@ A:                                      // @A
 .Lfunc_end5:
 	.size	A, .Lfunc_end5-A
                                         // -- End function
+	.section	.text.merged1,"ax", at progbits
+	.globl	merged1                         // -- Begin function merged1
+	.p2align	2
+	.type	merged1, at function
+merged1:                                // @merged1
+// %bb.0:                               // %entry
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #101
+	bl	F
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end6:
+	.size	merged1, .Lfunc_end6-merged1
+                                        // -- End function
+	.section	.text.merged2,"ax", at progbits
+	.globl	merged2                         // -- Begin function merged2
+	.p2align	2
+	.type	merged2, at function
+merged2:                                // @merged2
+// %bb.0:                               // %entry
+	sub	sp, sp, #32
+	stp	x29, x30, [sp, #16]             // 16-byte Folded Spill
+	add	x29, sp, #16
+	stur	w0, [x29, #-4]
+	ldur	w8, [x29, #-4]
+	add	w0, w8, #101
+	bl	F
+	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	add	sp, sp, #32
+	ret
+.Lfunc_end7:
+	.size	merged2, .Lfunc_end7-merged2
+                                        // -- End function
 	.section	.text._start,"ax", at progbits
 	.globl	_start                          // -- Begin function _start
 	.p2align	2
@@ -244,8 +289,8 @@ _start:                                 // @_start
 // %bb.0:                               // %entry
 	mov	w0, wzr
 	ret
-.Lfunc_end6:
-	.size	_start, .Lfunc_end6-_start
+.Lfunc_end8:
+	.size	_start, .Lfunc_end8-_start
                                         // -- End function
 	.type	s5, at object                      // @s5
 	.section	.rodata.s5,"a", at progbits
@@ -330,6 +375,7 @@ d1:
 
 	.section	".note.GNU-stack","", at progbits
 	.addrsig
+	.addrsig_sym F
 	.addrsig_sym C
 	.addrsig_sym B
 	.addrsig_sym A
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index 2eaff04bdc04700..f0ac1bd99f7227a 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -42,13 +42,15 @@
 # RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
 
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ICF-FUNC
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=%t/a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
 
-# COMPRESSION-FUNC: Ordered 7 sections using balanced partitioning
+# COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
+# COMPRESSION-ICF-FUNC: Ordered 7 sections using balanced partitioning
 # COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 14 sections using balanced partitioning
+# COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
 
 #--- a.s
 .text
@@ -78,6 +80,12 @@ F:
   add w0, w0, #3
   bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
   ret
+merged1:
+  add w0, w0, #101
+  ret
+merged2:
+  add w0, w0, #101
+  ret
 
 .data
 s1:

>From b65e7a132fb4416e22962ac41c887160ece7cb2f Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellishoag at meta.com>
Date: Mon, 10 Feb 2025 09:22:02 -0800
Subject: [PATCH 2/2] Skip live sections for ELF

---
 lld/ELF/BPSectionOrderer.cpp      |  7 +++++--
 lld/MachO/BPSectionOrderer.cpp    |  2 ++
 lld/test/ELF/bp-section-orderer.s | 23 +++++++++++++++--------
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 01f77b33926f755..4adb42ef4ff9365 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -75,8 +75,11 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
     auto *d = dyn_cast<Defined>(&sym);
     if (!d)
       return;
-    auto *sec = dyn_cast_or_null<InputSectionBase>(d->section);
-    if (!sec || sec->size == 0 || !orderer.secToSym.try_emplace(sec, d).second)
+    auto *sec = dyn_cast_or_null<InputSection>(d->section);
+    // Skip empty, discarded, ICF folded sections. Skipping ICF folded sections
+    // reduces duplicate detection work in BPSectionOrderer.
+    if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
+        !orderer.secToSym.try_emplace(sec, d).second)
       return;
     rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
         .insert(sections.size());
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index f693e5e59f8faea..950afd0421f0620 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -117,6 +117,8 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
         auto *isec = subsec.isec;
         if (!isec || isec->data.empty())
           continue;
+        // ConcatInputSections are entirely live or dead, so the offset is
+        // irrelevant.
         if (isa<ConcatInputSection>(isec) && !isec->isLive(0))
           continue;
         size_t idx = sections.size();
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 6a61e7e8cef2b8d..4889db63cd4d18d 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -19,7 +19,7 @@
 
 # RUN: llvm-mc -filetype=obj -triple=aarch64 a.s -o a.o
 # RUN: llvm-profdata merge a.proftext -o a.profdata
-# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
+# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
 
 # STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
 # STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
@@ -33,7 +33,7 @@
 # ORDER-STARTUP: s2,s1,s5,s4,s3,A,F,E,D,B,C,merged1,merged2,_start,d3,d2,d4,d1,{{$}}
 
 # RUN: ld.lld -o out.cf a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-FUNC
-# RUN: ld.lld -o out.cf.icf a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-ICF-FUNC
+# RUN: ld.lld -o out.cf.icf a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=BP-COMPRESSION-ICF-FUNC
 # RUN: llvm-nm -jn out.cf | tr '\n' , | FileCheck %s --check-prefix=CFUNC
 # CFUNC: s5,s4,s3,s2,s1,A,F,merged1,merged2,C,E,D,B,_start,d4,d3,d2,d1,{{$}}
 
@@ -118,18 +118,22 @@ int d3[] = {5,6,7,8};
 int d2[] = {7,8,9,10};
 int d1[] = {3,4,5,6};
 
+// used is to suppress compiler garbage collection in ELF; retain is to suppress linker garbage collection; used is not needed for non-internal linkage symbols
+// used is for both compiler/linker GC in Mach-O; retain is ignored for Mach-O
+#define RETAIN [[gnu::used,gnu::retain]]
+
 int C(int a);
 int B(int a);
 void A();
 
 int F(int a) { return C(a + 3); }
-int E(int a) { return C(a + 2); }
-int D(int a) { return B(a + 2); }
+RETAIN int E(int a) { return C(a + 2); }
+RETAIN int D(int a) { return B(a + 2); }
 int C(int a) { A(); return a + 2; }
 int B(int a) { A(); return a + 1; }
 void A() {}
 
-int merged1(int a) { return F(a + 101); }
+RETAIN int merged1(int a) { return F(a + 101); }
 int merged2(int a) { return F(a + 101); }
 
 int _start() { return 0; }
@@ -176,7 +180,7 @@ C:                                      // @C
 .Lfunc_end1:
 	.size	C, .Lfunc_end1-C
                                         // -- End function
-	.section	.text.E,"ax", at progbits
+	.section	.text.E,"axR", at progbits
 	.globl	E                               // -- Begin function E
 	.p2align	2
 	.type	E, at function
@@ -195,7 +199,7 @@ E:                                      // @E
 .Lfunc_end2:
 	.size	E, .Lfunc_end2-E
                                         // -- End function
-	.section	.text.D,"ax", at progbits
+	.section	.text.D,"axR", at progbits
 	.globl	D                               // -- Begin function D
 	.p2align	2
 	.type	D, at function
@@ -243,7 +247,7 @@ A:                                      // @A
 .Lfunc_end5:
 	.size	A, .Lfunc_end5-A
                                         // -- End function
-	.section	.text.merged1,"ax", at progbits
+	.section	.text.merged1,"axR", at progbits
 	.globl	merged1                         // -- Begin function merged1
 	.p2align	2
 	.type	merged1, at function
@@ -377,5 +381,8 @@ d1:
 	.addrsig
 	.addrsig_sym F
 	.addrsig_sym C
+	.addrsig_sym E
+	.addrsig_sym D
 	.addrsig_sym B
 	.addrsig_sym A
+	.addrsig_sym merged1



More information about the llvm-commits mailing list