[lld] 2d48b19 - [lld/mac] Fix mislink with ICF

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 30 15:59:39 PDT 2021


Author: Nico Weber
Date: 2021-10-30T18:58:59-04:00
New Revision: 2d48b19136722f76e467c5e72d36208455f63953

URL: https://github.com/llvm/llvm-project/commit/2d48b19136722f76e467c5e72d36208455f63953
DIFF: https://github.com/llvm/llvm-project/commit/2d48b19136722f76e467c5e72d36208455f63953.diff

LOG: [lld/mac] Fix mislink with ICF

When comparing relocations against two symbols, ICF's equalsConstant() did not
look at the value of the two symbols. With subsections_via_symbols, the value
is usually 0 but not always: In particular, it isn't 0 for constants in string
and literal sections. Since we ignored the value, comparing two constant string
symbols or two literal symbols always compared the 0th's element, so functions
in the same TU always compared as equal.

This can cause mislinks, and, with -dead_strip, crashes.

Fixes PR52349, see that bug for lots of details and examples of mislinks.

While here, make the existing assembly in icf-literals.s a bit more realistic
(use leaq instead of movq with strings, and use foo(%rip) instead of
foo at gotpcrel(%rip)). This has no interesting effect, it just maybe makes the
test look a bit less surprising.

Differential Revision: https://reviews.llvm.org/D112862

Added: 
    

Modified: 
    lld/MachO/ICF.cpp
    lld/test/MachO/icf-literals.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index e583741e30855..57e1da3f9a7f8 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -105,6 +105,9 @@ static bool equalsConstant(const ConcatInputSection *ia,
       return false;
 
     InputSection *isecA, *isecB;
+
+    uint64_t valueA = 0;
+    uint64_t valueB = 0;
     if (ra.referent.is<Symbol *>()) {
       const auto *sa = ra.referent.get<Symbol *>();
       const auto *sb = rb.referent.get<Symbol *>();
@@ -121,7 +124,9 @@ static bool equalsConstant(const ConcatInputSection *ia,
         return da->value == db->value;
       }
       isecA = da->isec;
+      valueA = da->value;
       isecB = db->isec;
+      valueB = db->value;
     } else {
       isecA = ra.referent.get<InputSection *>();
       isecB = rb.referent.get<InputSection *>();
@@ -136,7 +141,8 @@ static bool equalsConstant(const ConcatInputSection *ia,
       return true;
     // Else we have two literal sections. References to them are equal iff their
     // offsets in the output section are equal.
-    return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
+    return isecA->getOffset(valueA + ra.addend) ==
+           isecB->getOffset(valueB + rb.addend);
   };
   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
                     f);

diff  --git a/lld/test/MachO/icf-literals.s b/lld/test/MachO/icf-literals.s
index dbe0490dd6848..908929655195e 100644
--- a/lld/test/MachO/icf-literals.s
+++ b/lld/test/MachO/icf-literals.s
@@ -13,6 +13,10 @@
 # CHECK-NEXT: callq   _baz2_ref
 # CHECK-NEXT: callq   _qux2_ref
 # CHECK-NEXT: callq   _qux2_ref
+# CHECK-NEXT: callq   _sub_str_a_b
+# CHECK-NEXT: callq   _sub_str_b_a
+# CHECK-NEXT: callq   _sub_lit_a_b
+# CHECK-NEXT: callq   _sub_lit_b_a
 
 # CHECK:      [[#%.16x,FOO:]]     l     O __TEXT,__cstring _foo1
 # CHECK-NEXT: [[#%.16x,FOO:]]     l     O __TEXT,__cstring _foo2
@@ -56,21 +60,42 @@ _qux2:
 
 .text
 _foo1_ref:
-  mov _foo1 at GOTPCREL(%rip), %rax
+  leaq _foo1(%rip), %rax
 _foo2_ref:
-  mov _foo2 at GOTPCREL(%rip), %rax
+  leaq _foo2(%rip), %rax
 _bar1_ref:
-  mov _bar1 at GOTPCREL(%rip), %rax
+  leaq _bar1(%rip), %rax
 _bar2_ref:
-  mov _bar2 at GOTPCREL(%rip), %rax
+  leaq _bar2(%rip), %rax
 _baz1_ref:
-  mov _baz1 at GOTPCREL(%rip), %rax
+  movq _baz1(%rip), %rax
 _baz2_ref:
-  mov _baz2 at GOTPCREL(%rip), %rax
+  movq _baz2(%rip), %rax
 _qux1_ref:
-  mov _qux1 at GOTPCREL(%rip), %rax
+  movq _qux1(%rip), %rax
 _qux2_ref:
-  mov _qux2 at GOTPCREL(%rip), %rax
+  movq _qux2(%rip), %rax
+
+## _sub_str_a_b and _sub_str_b_a should not be folded: They contain relocations
+## against the same string symbols, but in a 
diff erent order and hence
+## return 
diff erent numbers.
+_sub_str_a_b:
+  leaq _foo2(%rip), %rdx
+  leaq _bar2(%rip), %rax
+  subq %rdx, %rax
+_sub_str_b_a:
+  leaq _bar2(%rip), %rdx
+  leaq _foo2(%rip), %rax
+  subq %rdx, %rax
+
+## Same with literals instead of strings.
+_sub_lit_a_b:
+  movq _baz2(%rip), %rax
+  subq _qux2(%rip), %rax
+_sub_lit_b_a:
+  movq _qux2(%rip), %rax
+  subq _baz2(%rip), %rax
+
 
 .globl _main
 _main:
@@ -82,5 +107,9 @@ _main:
   callq _baz2_ref
   callq _qux1_ref
   callq _qux2_ref
+  callq _sub_str_a_b
+  callq _sub_str_b_a
+  callq _sub_lit_a_b
+  callq _sub_lit_b_a
 
 .subsections_via_symbols


        


More information about the llvm-commits mailing list