[lld] [lld][ICF] Don't merge symbols with different addends (PR #139493)

Pranav Kant via llvm-commits llvm-commits at lists.llvm.org
Mon May 12 07:28:43 PDT 2025


https://github.com/pranavk updated https://github.com/llvm/llvm-project/pull/139493

>From 9c00cc1e97a3dff809252219d22d7d5650ed51e9 Mon Sep 17 00:00:00 2001
From: Pranav Kant <prka at google.com>
Date: Mon, 12 May 2025 07:28:27 -0700
Subject: [PATCH] [lld][ICF] Don't merge symbols with different addends

---
 lld/ELF/ICF.cpp           | 34 +++++++++++++++++++---------------
 lld/test/ELF/icf-addend.s | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 15 deletions(-)
 create mode 100644 lld/test/ELF/icf-addend.s

diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 1882116d4d5f0..9f56f957ab687 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -334,26 +334,28 @@ bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {
              : constantEq(a, ra.relas, b, rb.relas);
 }
 
-template <class RelTy>
-static SmallVector<Symbol *> getReloc(const InputSection *sec,
-                                      Relocs<RelTy> relocs) {
-  SmallVector<Symbol *> syms;
+template <class ELFT, class RelTy>
+static SmallVector<std::pair<Symbol *, uint64_t>>
+getReloc(const InputSection *sec, Relocs<RelTy> relocs) {
+  SmallVector<std::pair<Symbol *, uint64_t>> syms;
   for (auto ri = relocs.begin(), re = relocs.end(); ri != re; ++ri) {
     Symbol &sym = sec->file->getRelocTargetSym(*ri);
-    syms.push_back(&sym);
+    uint64_t addend = getAddend<ELFT>(*ri);
+    syms.push_back(std::make_pair(&sym, addend));
   }
   return syms;
 }
 
 template <class ELFT>
-static SmallVector<Symbol *> getRelocTargetSyms(const InputSection *sec) {
+static SmallVector<std::pair<Symbol *, uint64_t>>
+getRelocTargets(const InputSection *sec) {
   const RelsOrRelas<ELFT> rel = sec->template relsOrRelas<ELFT>();
   if (rel.areRelocsCrel())
-    return getReloc(sec, rel.crels);
+    return getReloc<ELFT>(sec, rel.crels);
   if (rel.areRelocsRel())
-    return getReloc(sec, rel.rels);
+    return getReloc<ELFT>(sec, rel.rels);
 
-  return getReloc(sec, rel.relas);
+  return getReloc<ELFT>(sec, rel.relas);
 }
 
 // Compare two lists of relocations. Returns true if all pairs of
@@ -568,19 +570,21 @@ template <class ELFT> void ICF<ELFT>::run() {
     if (end - begin == 1)
       return;
     print() << "selected section " << sections[begin];
-    SmallVector<Symbol *> syms = getRelocTargetSyms<ELFT>(sections[begin]);
+    SmallVector<std::pair<Symbol *, uint64_t>> syms =
+        getRelocTargets<ELFT>(sections[begin]);
     for (size_t i = begin + 1; i < end; ++i) {
       print() << "  removing identical section " << sections[i];
       sections[begin]->replace(sections[i]);
-      SmallVector<Symbol *> replacedSyms =
-          getRelocTargetSyms<ELFT>(sections[i]);
+      SmallVector<std::pair<Symbol *, uint64_t>> replacedSyms =
+          getRelocTargets<ELFT>(sections[i]);
       assert(syms.size() == replacedSyms.size() &&
              "Should have same number of syms!");
       for (size_t i = 0; i < syms.size(); i++) {
-        if (syms[i] == replacedSyms[i] || !syms[i]->isGlobal() ||
-            !replacedSyms[i]->isGlobal())
+        if (syms[i].first == replacedSyms[i].first ||
+            !syms[i].first->isGlobal() || !replacedSyms[i].first->isGlobal() ||
+            syms[i].second != replacedSyms[i].second)
           continue;
-        symbolEquivalence.unionSets(syms[i], replacedSyms[i]);
+        symbolEquivalence.unionSets(syms[i].first, replacedSyms[i].first);
       }
 
       // At this point we know sections merged are fully identical and hence
diff --git a/lld/test/ELF/icf-addend.s b/lld/test/ELF/icf-addend.s
new file mode 100644
index 0000000000000..0023bbf2ff421
--- /dev/null
+++ b/lld/test/ELF/icf-addend.s
@@ -0,0 +1,33 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o
+# RUN: ld.lld %t.o -o /dev/null --icf=all --print-icf-sections | FileCheck %s
+
+# Check that ICF doesn't fold different symbols when functions referencing them
+# can be folded because they are pointing to the same address.
+
+# CHECK: selected section {{.*}}:(.text.f1)
+# CHECK:   removing identical section {{.*}}:(.text.f2)
+# CHECK-NOT: redirecting 'y' in symtab to 'x'
+# CHECK-NOT: redirecting 'y' to 'x'
+
+.globl x, y
+
+.section .rodata,"a", at progbits
+x:
+.long 11
+y:
+.long 12
+
+.section .text.f1,"ax", at progbits
+f1:
+movq x+4(%rip), %rax 
+
+.section .text.f2,"ax", at progbits
+f2:
+movq y(%rip), %rax
+
+.section .text._start,"ax", at progbits
+.globl _start
+_start:
+call f1
+call f2



More information about the llvm-commits mailing list