[lld] [lld] Merge equivalent symbols found during ICF (PR #134342)

Pranav Kant via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 3 21:06:32 PDT 2025


https://github.com/pranavk updated https://github.com/llvm/llvm-project/pull/134342

>From 07701ce112b5a6dcd5f5284d2d811762c9abce93 Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Fri, 4 Apr 2025 03:44:16 +0000
Subject: [PATCH 1/2] [lld] Merge equivalent symbols found during ICF

---
 lld/ELF/ICF.cpp                        | 51 ++++++++++++++++++-
 lld/ELF/SymbolTable.cpp                |  7 +++
 lld/ELF/SymbolTable.h                  |  1 +
 lld/test/ELF/aarch64-got-merging-icf.s | 68 ++++++++++++++++++++++++++
 lld/test/ELF/icf-preemptible.s         |  3 ++
 5 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/ELF/aarch64-got-merging-icf.s

diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 1cdcf6be9d8a9..467487e10f310 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -333,6 +333,28 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
              : constantEq(a, ra.relas, b, rb.relas);
 }
 
+template <class RelTy>
+static SmallVector<Symbol *> getReloc(const InputSection *sec,
+                                      Relocs<RelTy> relocs) {
+  SmallVector<Symbol *> syms;
+  for (auto ri = relocs.begin(), re = relocs.end(); ri != re; ++ri) {
+    Symbol &sym = sec->file->getRelocTargetSym(*ri);
+    syms.push_back(&sym);
+  }
+  return syms;
+}
+
+template <class ELFT>
+static SmallVector<Symbol *> getRelocTargetSyms(const InputSection *sec) {
+  const RelsOrRelas<ELFT> rel = sec->template relsOrRelas<ELFT>();
+  if (rel.areRelocsCrel())
+    return getReloc(sec, rel.crels);
+  if (rel.areRelocsRel())
+    return getReloc(sec, rel.rels);
+
+  return getReloc(sec, rel.relas);
+}
+
 // Compare two lists of relocations. Returns true if all pairs of
 // relocations point to the same section in terms of ICF.
 template <class ELFT>
@@ -535,14 +557,35 @@ template <class ELFT> void ICF<ELFT>::run() {
   auto print = [&ctx = ctx]() -> ELFSyncStream {
     return {ctx, ctx.arg.printIcfSections ? DiagLevel::Msg : DiagLevel::None};
   };
+
+  DenseMap<Symbol *, Symbol *> symbolMap;
   // Merge sections by the equivalence class.
+  // Merge symbols identified as equivalent during ICF
   forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) {
     if (end - begin == 1)
       return;
     print() << "selected section " << sections[begin];
+    SmallVector<Symbol *> syms = getRelocTargetSyms<ELFT>(sections[begin]);
     for (size_t i = begin + 1; i < end; ++i) {
       print() << "  removing identical section " << sections[i];
       sections[begin]->replace(sections[i]);
+      SmallVector<Symbol *> replacedSyms =
+          getRelocTargetSyms<ELFT>(sections[i]);
+      assert(syms.size() == replacedSyms.size() &&
+             "Should have same number of syms!");
+      for (size_t i = 0; i < syms.size(); i++) {
+        if (syms[i] == replacedSyms[i] || !syms[i]->isGlobal() ||
+            !replacedSyms[i]->isGlobal())
+          continue;
+        auto [it, inserted] =
+            symbolMap.insert(std::make_pair(replacedSyms[i], syms[i]));
+        print() << "  selected symbol: " << syms[i]->getName().data()
+                << "; replaced symbol: " << replacedSyms[i]->getName().data();
+        if (!inserted) {
+          print() << "    replacement already exists: "
+                  << it->getSecond()->getName().data();
+        }
+      }
 
       // At this point we know sections merged are fully identical and hence
       // we want to remove duplicate implicit dependencies such as link order
@@ -561,11 +604,17 @@ template <class ELFT> void ICF<ELFT>::run() {
           d->folded = true;
         }
   };
-  for (Symbol *sym : ctx.symtab->getSymbols())
+  for (Symbol *sym : ctx.symtab->getSymbols()) {
     fold(sym);
+    if (Symbol *s = symbolMap.lookup(sym))
+      ctx.symtab->redirect(sym, s);
+  }
   parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) {
     for (Symbol *sym : file->getLocalSymbols())
       fold(sym);
+    for (Symbol *&sym : file->getMutableGlobalSymbols())
+      if (Symbol *s = symbolMap.lookup(sym))
+        sym = s;
   });
 
   // InputSectionDescription::sections is populated by processSectionCommands().
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index b8a70d4e898fc..2199f159692b0 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -29,6 +29,13 @@ using namespace llvm::ELF;
 using namespace lld;
 using namespace lld::elf;
 
+void SymbolTable::redirect(Symbol *from, Symbol *to) {
+  int &fromIdx = symMap[CachedHashStringRef(from->getName())];
+  const int toIdx = symMap[CachedHashStringRef(to->getName())];
+
+  fromIdx = toIdx;
+}
+
 void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
   // Redirect __real_foo to the original foo and foo to the original __wrap_foo.
   int &idx1 = symMap[CachedHashStringRef(sym->getName())];
diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h
index d6443742f7baa..e3a39bac85f97 100644
--- a/lld/ELF/SymbolTable.h
+++ b/lld/ELF/SymbolTable.h
@@ -41,6 +41,7 @@ class SymbolTable {
   SymbolTable(Ctx &ctx) : ctx(ctx) {}
   ArrayRef<Symbol *> getSymbols() const { return symVector; }
 
+  void redirect(Symbol *from, Symbol *to);
   void wrap(Symbol *sym, Symbol *real, Symbol *wrap);
 
   Symbol *insert(StringRef name);
diff --git a/lld/test/ELF/aarch64-got-merging-icf.s b/lld/test/ELF/aarch64-got-merging-icf.s
new file mode 100644
index 0000000000000..9f359cbf3a0a9
--- /dev/null
+++ b/lld/test/ELF/aarch64-got-merging-icf.s
@@ -0,0 +1,68 @@
+// REQUIRES: aarch64
+
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t
+# RUN: ld.lld %t -o %t2 --icf=all
+# RUN: llvm-objdump --section-headers %t2 | FileCheck %s --check-prefix=EXE
+
+# RUN: ld.lld -shared %t -o %t3 --icf=all
+# RUN: llvm-objdump --section-headers %t3 | FileCheck %s --check-prefix=DSO
+
+## All .rodata.* sections should merge into a single GOT entry
+# EXE: {{.*}}.got 00000008{{.*}}
+
+## When symbols are preemptible in DSO mode, GOT entries wouldn't be merged
+# DSO: {{.*}}.got 00000020{{.*}}
+
+.addrsig
+
+callee:
+ret
+
+.section .rodata.dummy1,"a", at progbits
+sym1:
+.long 111
+.long 122
+.byte 123
+
+.section .rodata.dummy2,"a", at progbits
+sym2:
+.long 111
+.long 122
+sym3:
+.byte 123
+
+.macro f, index
+
+.section .text.f1_\index,"ax", at progbits
+f1_\index:
+adrp x0, :got:g\index
+mov x1, #\index
+b f2_\index
+
+.section .text.f2_\index,"ax", at progbits
+f2_\index:
+ldr x0, [x0, :got_lo12:g\index] 
+b callee
+
+.globl g\index
+.section .rodata.g\index,"a", at progbits
+g_\index:
+.long 111
+.long 122
+
+g\index:
+.byte 123
+
+.section .text._start,"ax", at progbits
+bl f1_\index
+
+.endm
+
+.section .text._start,"ax", at progbits
+.globl _start
+_start:
+
+f 0
+f 1
+f 2
+f 3
diff --git a/lld/test/ELF/icf-preemptible.s b/lld/test/ELF/icf-preemptible.s
index 4bd1eca438b19..f79cf73b911ba 100644
--- a/lld/test/ELF/icf-preemptible.s
+++ b/lld/test/ELF/icf-preemptible.s
@@ -11,12 +11,15 @@
 # EXE-NOT:  {{.}}
 # EXE:      selected section {{.*}}:(.text.g1)
 # EXE-NEXT:   removing identical section {{.*}}:(.text.g2)
+# EXE-NEXT:   selected symbol: f1; replaced symbol: f2
 # EXE-NEXT:   removing identical section {{.*}}:(.text.g3)
 # EXE-NEXT: selected section {{.*}}:(.text.f1)
 # EXE-NEXT:   removing identical section {{.*}}:(.text.f2)
 # EXE-NEXT: selected section {{.*}}:(.text.h1)
 # EXE-NEXT:   removing identical section {{.*}}:(.text.h2)
+# EXE-NEXT:   selected symbol: g1; replaced symbol: g2
 # EXE-NEXT:   removing identical section {{.*}}:(.text.h3)
+# EXE-NEXT:   selected symbol: g1; replaced symbol: g3
 # EXE-NOT:  {{.}}
 
 ## Definitions are preemptible in a DSO. Only leaf functions can be folded.

>From 7e2cfb4ab12391378199bfb3fb2bbb433f8e59cf Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Fri, 4 Apr 2025 04:06:17 +0000
Subject: [PATCH 2/2] add comments to test case

---
 lld/test/ELF/aarch64-got-merging-icf.s | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lld/test/ELF/aarch64-got-merging-icf.s b/lld/test/ELF/aarch64-got-merging-icf.s
index 9f359cbf3a0a9..c5717106d582a 100644
--- a/lld/test/ELF/aarch64-got-merging-icf.s
+++ b/lld/test/ELF/aarch64-got-merging-icf.s
@@ -33,17 +33,20 @@ sym3:
 
 .macro f, index
 
+# (Kept unique) first instruction of the GOT code sequence
 .section .text.f1_\index,"ax", at progbits
 f1_\index:
 adrp x0, :got:g\index
 mov x1, #\index
 b f2_\index
 
+# Folded, second instruction of the GOT code sequence
 .section .text.f2_\index,"ax", at progbits
 f2_\index:
 ldr x0, [x0, :got_lo12:g\index] 
 b callee
 
+# Folded
 .globl g\index
 .section .rodata.g\index,"a", at progbits
 g_\index:



More information about the llvm-commits mailing list