[lld] a650f2e - Revert "[lld-macho] Private label aliases to weak symbols should not retain section data"

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 15 15:43:34 PST 2022


Author: Jez Ng
Date: 2022-12-15T18:43:00-05:00
New Revision: a650f2ec7a37cf1f495108bbb313e948c232c29c

URL: https://github.com/llvm/llvm-project/commit/a650f2ec7a37cf1f495108bbb313e948c232c29c
DIFF: https://github.com/llvm/llvm-project/commit/a650f2ec7a37cf1f495108bbb313e948c232c29c.diff

LOG: Revert "[lld-macho] Private label aliases to weak symbols should not retain section data"

This reverts commit 6736bce6db5fe15bcb765b976c99fff34500d1eb.

It's causing Swift-related crashes in e.g.
https://bugs.chromium.org/p/chromium/issues/detail?id=1400716 and
elsewhere.

Added: 
    

Modified: 
    lld/MachO/InputFiles.cpp
    lld/MachO/UnwindInfoSection.cpp

Removed: 
    lld/test/MachO/weak-def-alias-private-label.s


################################################################################
diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 7fb5594aa97cf..54a02f264b851 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -622,12 +622,6 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
   }
 }
 
-// Symbols with `l` or `L` as a prefix are linker-private and never appear in
-// the output.
-static bool isPrivateLabel(StringRef name) {
-  return name.startswith("l") || name.startswith("L");
-}
-
 template <class NList>
 static macho::Symbol *createDefined(const NList &sym, StringRef name,
                                     InputSection *isec, uint64_t value,
@@ -697,7 +691,8 @@ static macho::Symbol *createDefined(const NList &sym, StringRef name,
   }
   assert(!isWeakDefCanBeHidden &&
          "weak_def_can_be_hidden on already-hidden symbol?");
-  bool includeInSymtab = !isPrivateLabel(name) && !isEhFrameSection(isec);
+  bool includeInSymtab =
+      !name.startswith("l") && !name.startswith("L") && !isEhFrameSection(isec);
   return make<Defined>(
       name, isec->getFile(), isec, value, size, sym.n_desc & N_WEAK_DEF,
       /*isExternal=*/false, /*isPrivateExtern=*/false, includeInSymtab,
@@ -830,25 +825,17 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
     }
     sections[i]->doneSplitting = true;
 
-    auto getSymName = [strtab](const NList& sym) -> StringRef {
-      return StringRef(strtab + sym.n_strx);
-    };
-
     // Calculate symbol sizes and create subsections by splitting the sections
     // along symbol boundaries.
     // We populate subsections by repeatedly splitting the last (highest
     // address) subsection.
     llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
-      // Put private-label symbols after other symbols at the same address.
-      if (nList[lhs].n_value == nList[rhs].n_value)
-        return !isPrivateLabel(getSymName(nList[lhs])) &&
-               isPrivateLabel(getSymName(nList[rhs]));
       return nList[lhs].n_value < nList[rhs].n_value;
     });
     for (size_t j = 0; j < symbolIndices.size(); ++j) {
       const uint32_t symIndex = symbolIndices[j];
       const NList &sym = nList[symIndex];
-      StringRef name = getSymName(sym);
+      StringRef name = strtab + sym.n_strx;
       Subsection &subsec = subsections.back();
       InputSection *isec = subsec.isec;
 
@@ -868,15 +855,8 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
       if (!subsectionsViaSymbols || symbolOffset == 0 ||
           sym.n_desc & N_ALT_ENTRY || !isa<ConcatInputSection>(isec)) {
         isec->hasAltEntry = symbolOffset != 0;
-        // If we have an private-label symbol that's an alias, just reuse the
-        // aliased symbol. Our sorting step above ensures that any such symbols
-        // will appear after the non-private-label ones. See
-        // weak-def-alias-ignored.s for the motivation behind this.
-        if (symbolOffset == 0 && isPrivateLabel(name) && j != 0)
-          symbols[symIndex] = symbols[symbolIndices[j - 1]];
-        else
-          symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
-                                            symbolSize, forceHidden);
+        symbols[symIndex] = createDefined(sym, name, isec, symbolOffset,
+                                          symbolSize, forceHidden);
         continue;
       }
       auto *concatIsec = cast<ConcatInputSection>(isec);

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 470f33523ddb0..8ad2aeb9f2158 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -209,7 +209,7 @@ void UnwindInfoSection::addSymbol(const Defined *d) {
   // If we have multiple symbols at the same address, only one of them can have
   // an associated unwind entry.
   if (!p.second && d->unwindEntry) {
-    assert(p.first->second == d || !p.first->second->unwindEntry);
+    assert(!p.first->second->unwindEntry);
     p.first->second = d;
   }
 }

diff  --git a/lld/test/MachO/weak-def-alias-private-label.s b/lld/test/MachO/weak-def-alias-private-label.s
deleted file mode 100644
index 32067b77867b1..0000000000000
--- a/lld/test/MachO/weak-def-alias-private-label.s
+++ /dev/null
@@ -1,75 +0,0 @@
-# REQUIRES: x86
-## This test checks that when we coalesce weak definitions, any private-label
-## aliases to those weak defs don't cause the coalesced data to be retained.
-## This test explicitly creates those private-label symbols, but it was actually
-## motivated by MC's aarch64 backend which automatically creates them when
-## emitting object files. I've chosen to explicitly create them here since we
-## can then reference those symbols for a more complete test.
-##
-## Not retaining the data matters for more than just size -- we have a use case
-## that depends on proper data coalescing to emit a valid file format.
-##
-## ld64 actually treats all local symbol aliases (not just the private ones) the
-## same way. But implementing this is harder -- we would have to create those
-## symbols first (so we can emit their names later), but we would have to
-## ensure the linker correctly shuffles them around when their aliasees get
-## coalesced. Emulating the behavior of weak binds for non-private symbols would
-## be even trickier. Let's just deal with private-label symbols for now until we
-## find a use case for more general local symbols.
-##
-## Finally, ld64 does all this regardless of whether .subsections_via_symbols is
-## specified. We don't. But again, given how rare the lack of that directive is
-## (I've only seen it from hand-written assembly inputs), I don't think we need
-## to worry about it.
-
-# RUN: rm -rf %t; split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-then-private.s -o %t/weak-then-private.o
-# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/private-then-weak.s -o %t/private-then-weak.o
-# RUN: %lld -dylib %t/weak-then-private.o %t/private-then-weak.o -o %t/test1
-# RUN: %lld -dylib %t/private-then-weak.o %t/weak-then-private.o -o %t/test2
-# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test1 | FileCheck %s
-# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test2 | FileCheck %s
-
-## Check that we only have one copy of 0x123 in the data, not two.
-# CHECK:       Contents of (__DATA,__data) section
-# CHECK-NEXT:  0000000000001000  23 01 00 00 00 00 00 00 00 10 00 00 00 00 00 00 {{$}}
-# CHECK-NEXT:  0000000000001010  00 10 00 00 00 00 00 00 {{$}}
-# CHECK-EMPTY:
-# CHECK-NEXT:  SYMBOL TABLE:
-# CHECK-NEXT:  0000000000001008 l     O __DATA,__data _ref
-# CHECK-NEXT:  0000000000001010 l     O __DATA,__data _ref
-# CHECK-NEXT:  0000000000001000  w    O __DATA,__data _weak
-# CHECK-NEXT:  0000000000000000         *UND* dyld_stub_binder
-# CHECK-EMPTY:
-## Even though the references were to the non-weak `l_ignored` aliases, we
-## should still emit weak binds as if they were the `_weak` symbol itself.
-# CHECK-NEXT:  Weak bind table:
-# CHECK-NEXT:  segment  section            address     type       addend   symbol
-# CHECK-NEXT:  __DATA   __data             0x00001008 pointer         0   _weak
-# CHECK-NEXT:  __DATA   __data             0x00001010 pointer         0   _weak
-
-#--- weak-then-private.s
-.globl _weak
-.weak_definition _weak
-.data
-_weak:
-l_ignored:
-  .quad 0x123
-
-_ref:
-  .quad l_ignored
-
-.subsections_via_symbols
-
-#--- private-then-weak.s
-.globl _weak
-.weak_definition _weak
-.data
-l_ignored:
-_weak:
-  .quad 0x123
-
-_ref:
-  .quad l_ignored
-
-.subsections_via_symbols


        


More information about the llvm-commits mailing list