[lld] 9cc489a - [lld-macho][nfc] Factor-out NFC changes from main __eh_frame diff

Greg McGary via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 17 14:20:40 PST 2021


Author: Greg McGary
Date: 2021-11-17T15:16:44-07:00
New Revision: 9cc489a4b2b58b70f7b7d84a8544e22f39423ae6

URL: https://github.com/llvm/llvm-project/commit/9cc489a4b2b58b70f7b7d84a8544e22f39423ae6
DIFF: https://github.com/llvm/llvm-project/commit/9cc489a4b2b58b70f7b7d84a8544e22f39423ae6.diff

LOG: [lld-macho][nfc] Factor-out NFC changes from main __eh_frame diff

In order to keep signal:noise high for the `__eh_frame` diff, I have teased-out the NFC changes and put them here.

Differential Revision: https://reviews.llvm.org/D114017

Added: 
    

Modified: 
    lld/MachO/Driver.cpp
    lld/MachO/ICF.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/MarkLive.cpp
    lld/MachO/Symbols.cpp
    lld/MachO/Symbols.h
    lld/MachO/UnwindInfoSection.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 37955c9036f52..7cce6cd3bc351 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1022,15 +1022,17 @@ static void gatherInputSections() {
   int inputOrder = 0;
   for (const InputFile *file : inputFiles) {
     for (const Section &section : file->sections) {
+      const Subsections &subsections = section.subsections;
+      if (subsections.empty())
+        continue;
+      if (subsections[0].isec->getName() == section_names::compactUnwind)
+        // Compact unwind entries require special handling elsewhere.
+        continue;
       ConcatOutputSection *osec = nullptr;
-      for (const Subsection &subsection : section.subsections) {
+      for (const Subsection &subsection : subsections) {
         if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
           if (isec->isCoalescedWeak())
             continue;
-          if (isec->getSegName() == segment_names::ld) {
-            assert(isec->getName() == section_names::compactUnwind);
-            continue;
-          }
           isec->outSecOff = inputOrder++;
           if (!osec)
             osec = ConcatOutputSection::getOrCreateForInput(isec);

diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 6c3a565e74a98..8802f292a9c85 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -187,7 +187,7 @@ static bool equalsVariable(const ConcatInputSection *ia,
   // info matches. For simplicity, we only handle the case where there are only
   // symbols at offset zero within the section (which is typically the case with
   // .subsections_via_symbols.)
-  auto hasCU = [](Defined *d) { return d->compactUnwind; };
+  auto hasCU = [](Defined *d) { return d->unwindEntry != nullptr; };
   auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
   auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
   if (itA == ia->symbols.end())
@@ -196,8 +196,8 @@ static bool equalsVariable(const ConcatInputSection *ia,
     return false;
   const Defined *da = *itA;
   const Defined *db = *itB;
-  if (da->compactUnwind->icfEqClass[icfPass % 2] !=
-          db->compactUnwind->icfEqClass[icfPass % 2] ||
+  if (da->unwindEntry->icfEqClass[icfPass % 2] !=
+          db->unwindEntry->icfEqClass[icfPass % 2] ||
       da->value != 0 || db->value != 0)
     return false;
   auto isZero = [](Defined *d) { return d->value == 0; };
@@ -363,8 +363,8 @@ void macho::foldIdenticalSections() {
     if (isHashable) {
       hashable.push_back(isec);
       for (Defined *d : isec->symbols)
-        if (d->compactUnwind)
-          hashable.push_back(d->compactUnwind);
+        if (d->unwindEntry)
+          hashable.push_back(d->unwindEntry);
     } else {
       isec->icfEqClass[0] = ++icfUniqueID;
     }

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 7bc432296024d..558de4131cb9f 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -277,17 +277,16 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
     if (sec.align >= 32) {
       error("alignment " + std::to_string(sec.align) + " of section " + name +
             " is too large");
-      sections.push_back({});
+      sections.push_back(sec.addr);
       continue;
     }
     uint32_t align = 1 << sec.align;
     uint32_t flags = sec.flags;
 
     auto splitRecords = [&](int recordSize) -> void {
-      sections.push_back({});
+      sections.push_back(sec.addr);
       if (data.empty())
         return;
-
       Subsections &subsections = sections.back().subsections;
       subsections.reserve(data.size() / recordSize);
       auto *isec = make<ConcatInputSection>(
@@ -319,10 +318,12 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
         isec = make<WordLiteralInputSection>(segname, name, this, data, align,
                                              flags);
       }
-      sections.push_back({});
+      sections.push_back(sec.addr);
       sections.back().subsections.push_back({0, isec});
     } else if (auto recordSize = getRecordSize(segname, name)) {
       splitRecords(*recordSize);
+      if (name == section_names::compactUnwind)
+        compactUnwindSection = &sections.back();
     } else if (segname == segment_names::llvm) {
       // ld64 does not appear to emit contents from sections within the __LLVM
       // segment. Symbols within those sections point to bitcode metadata
@@ -330,7 +331,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
       // have the same name without causing duplicate symbol errors. Push an
       // empty entry to ensure indices line up for the remaining sections.
       // TODO: Evaluate whether the bitcode metadata is needed.
-      sections.push_back({});
+      sections.push_back(sec.addr);
     } else {
       auto *isec =
           make<ConcatInputSection>(segname, name, this, data, align, flags);
@@ -340,10 +341,10 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
         // object files that contain them. We filter them out early to avoid
         // parsing their relocations unnecessarily. But we must still push an
         // empty entry to ensure the indices line up for the remaining sections.
-        sections.push_back({});
+        sections.push_back(sec.addr);
         debugSections.push_back(isec);
       } else {
-        sections.push_back({});
+        sections.push_back(sec.addr);
         sections.back().subsections.push_back({0, isec});
       }
     }
@@ -358,7 +359,7 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
 // same location as an offset relative to the start of the containing
 // subsection.
 template <class T>
-static InputSection *findContainingSubsection(Subsections &subsections,
+static InputSection *findContainingSubsection(const Subsections &subsections,
                                               T *offset) {
   static_assert(std::is_same<uint64_t, T>::value ||
                     std::is_same<uint32_t, T>::value,
@@ -437,8 +438,17 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
     // and insert them. Storing addends in the instruction stream is
     // possible, but inconvenient and more costly at link time.
 
-    int64_t pairedAddend = 0;
     relocation_info relInfo = relInfos[i];
+    bool isSubtrahend =
+        target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
+    if (isSubtrahend && StringRef(sec.sectname) == section_names::ehFrame) {
+      // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
+      // adds local "EH_Frame1" and "func.eh". Ignore them because they have
+      // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
+      ++i;
+      continue;
+    }
+    int64_t pairedAddend = 0;
     if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) {
       pairedAddend = SignExtend64<24>(relInfo.r_symbolnum);
       relInfo = relInfos[++i];
@@ -449,8 +459,6 @@ void ObjFile::parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
     if (relInfo.r_address & R_SCATTERED)
       fatal("TODO: Scattered relocations not supported");
 
-    bool isSubtrahend =
-        target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND);
     int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
     assert(!(embeddedAddend && pairedAddend));
     int64_t totalAddend = pairedAddend + embeddedAddend;
@@ -691,12 +699,17 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
     Subsections &subsections = sections[i].subsections;
     if (subsections.empty())
       continue;
-
+    InputSection *lastIsec = subsections.back().isec;
+    if (lastIsec->getName() == section_names::ehFrame) {
+      // __TEXT,__eh_frame only has symbols and SUBTRACTOR relocs when ld64 -r
+      // adds local "EH_Frame1" and "func.eh". Ignore them because they have
+      // gone unused by Mac OS since Snow Leopard (10.6), vintage 2009.
+      continue;
+    }
     std::vector<uint32_t> &symbolIndices = symbolsBySection[i];
     uint64_t sectionAddr = sectionHeaders[i].addr;
     uint32_t sectionAlign = 1u << sectionHeaders[i].align;
 
-    InputSection *lastIsec = subsections.back().isec;
     // Record-based sections have already been split into subsections during
     // parseSections(), so we simply need to match Symbols to the corresponding
     // subsection here.
@@ -725,11 +738,11 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
     llvm::stable_sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) {
       return nList[lhs].n_value < nList[rhs].n_value;
     });
-    Subsection subsec = subsections.back();
     for (size_t j = 0; j < symbolIndices.size(); ++j) {
       uint32_t symIndex = symbolIndices[j];
       const NList &sym = nList[symIndex];
       StringRef name = strtab + sym.n_strx;
+      Subsection &subsec = subsections.back();
       InputSection *isec = subsec.isec;
 
       uint64_t subsecAddr = sectionAddr + subsec.offset;
@@ -773,7 +786,6 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
       // emulating that behavior.
       nextIsec->align = MinAlign(sectionAlign, sym.n_value);
       subsections.push_back({sym.n_value - sectionAddr, nextIsec});
-      subsec = subsections.back();
     }
   }
 
@@ -799,7 +811,7 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
       make<ConcatInputSection>(segName.take_front(16), sectName.take_front(16),
                                /*file=*/this, data);
   isec->live = true;
-  sections.push_back({});
+  sections.push_back(0);
   sections.back().subsections.push_back({0, isec});
 }
 
@@ -869,7 +881,8 @@ template <class LP> void ObjFile::parse() {
   parseDebugInfo();
   if (config->emitDataInCodeInfo)
     parseDataInCode();
-  registerCompactUnwind();
+  if (compactUnwindSection)
+    registerCompactUnwind();
 }
 
 void ObjFile::parseDebugInfo() {
@@ -912,20 +925,7 @@ void ObjFile::parseDataInCode() {
 
 // Create pointers from symbols to their associated compact unwind entries.
 void ObjFile::registerCompactUnwind() {
-  // First, locate the __compact_unwind section.
-  Section *cuSection = nullptr;
-  for (Section &section : sections) {
-    if (section.subsections.empty())
-      continue;
-    if (section.subsections[0].isec->getSegName() != segment_names::ld)
-      continue;
-    cuSection = §ion;
-    break;
-  }
-  if (!cuSection)
-    return;
-
-  for (Subsection &subsection : cuSection->subsections) {
+  for (const Subsection &subsection : compactUnwindSection->subsections) {
     ConcatInputSection *isec = cast<ConcatInputSection>(subsection.isec);
     // Hack!! Since each CUE contains a 
diff erent function address, if ICF
     // operated naively and compared the entire contents of each CUE, entries
@@ -939,7 +939,7 @@ void ObjFile::registerCompactUnwind() {
     ConcatInputSection *referentIsec;
     for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
       Reloc &r = *it;
-      // We only wish to handle the relocation for CUE::functionAddress.
+      // CUE::functionAddress is at offset 0. Skip personality & LSDA relocs.
       if (r.offset != 0) {
         ++it;
         continue;
@@ -974,7 +974,7 @@ void ObjFile::registerCompactUnwind() {
         ++it;
         continue;
       }
-      (*symIt)->compactUnwind = isec;
+      (*symIt)->unwindEntry = isec;
       // Since we've sliced away the functionAddress, we should remove the
       // corresponding relocation too. Given that clang emits relocations in
       // reverse order of address, this relocation should be at the end of the

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 2c870b78316fd..93794cb5a4aae 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -41,6 +41,7 @@ namespace macho {
 struct PlatformInfo;
 class ConcatInputSection;
 class Symbol;
+class Defined;
 struct Reloc;
 enum class RefState : uint8_t;
 
@@ -59,8 +60,9 @@ struct Subsection {
 using Subsections = std::vector<Subsection>;
 
 struct Section {
-  // uint64_t address = 0; // TODO(gkm): this will debut with __eh_frame handler
+  uint64_t address = 0;
   Subsections subsections;
+  Section(uint64_t addr) : address(addr){};
 };
 
 class InputFile {
@@ -114,6 +116,8 @@ class ObjFile final : public InputFile {
   ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries;
 
 private:
+  Section *compactUnwindSection = nullptr;
+
   template <class LP> void parse();
   template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
   template <class LP>

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 0dbe6a619e985..96167b72a724f 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -120,10 +120,11 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
   it = symbols.begin();
   uint64_t v = (*it)->value;
   for (++it; it != symbols.end(); ++it) {
-    if ((*it)->value == v)
-      (*it)->compactUnwind = nullptr;
+    Defined *d = *it;
+    if (d->value == v)
+      d->unwindEntry = nullptr;
     else
-      v = (*it)->value;
+      v = d->value;
   }
 }
 

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index e8aefc16f6adb..1183e32fbabf9 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -298,6 +298,7 @@ constexpr const char debugAbbrev[] = "__debug_abbrev";
 constexpr const char debugInfo[] = "__debug_info";
 constexpr const char debugStr[] = "__debug_str";
 constexpr const char ehFrame[] = "__eh_frame";
+constexpr const char gccExceptTab[] = "__gcc_except_tab";
 constexpr const char export_[] = "__export";
 constexpr const char dataInCode[] = "__data_in_code";
 constexpr const char functionStarts[] = "__func_starts";

diff  --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index e4aba8fb90d42..4269c8342c656 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -51,8 +51,8 @@ void markLive() {
     if (auto *d = dyn_cast<Defined>(s)) {
       if (d->isec)
         enqueue(d->isec, d->value);
-      if (d->compactUnwind)
-        enqueue(d->compactUnwind, 0);
+      if (d->unwindEntry)
+        enqueue(d->unwindEntry, 0);
     }
   };
 

diff  --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 35b6d040038f0..bb6d073dcf308 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -95,8 +95,8 @@ uint64_t Defined::getVA() const {
 }
 
 void Defined::canonicalize() {
-  if (compactUnwind)
-    compactUnwind = compactUnwind->canonical();
+  if (unwindEntry)
+    unwindEntry = unwindEntry->canonical();
   if (isec)
     isec = isec->canonical();
 }

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index 2b44711861bb5..cc6f51cc5fd3a 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -168,7 +168,7 @@ class Defined : public Symbol {
   uint64_t value;
   // size is only calculated for regular (non-bitcode) symbols.
   uint64_t size;
-  ConcatInputSection *compactUnwind = nullptr;
+  ConcatInputSection *unwindEntry = nullptr;
 };
 
 // This enum does double-duty: as a symbol property, it indicates whether & how

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 149bb10cb9086..690098c7a3b78 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -154,8 +154,8 @@ void UnwindInfoSection::prepareRelocations() {
   // entries to the GOT. Hence the use of a MapVector for
   // UnwindInfoSection::symbols.
   for (const Defined *d : make_second_range(symbols))
-    if (d->compactUnwind)
-      prepareRelocations(d->compactUnwind);
+    if (d->unwindEntry)
+      prepareRelocations(d->unwindEntry);
 }
 
 // Record function symbols that may need entries emitted in __unwind_info, which
@@ -169,7 +169,7 @@ void UnwindInfoSection::prepareRelocations() {
 // symbols for each unique address regardless of whether they have associated
 // unwind info.
 void UnwindInfoSection::addSymbol(const Defined *d) {
-  if (d->compactUnwind)
+  if (d->unwindEntry)
     allEntriesAreOmitted = false;
   // We don't yet know the final output address of this symbol, but we know that
   // they are uniquely determined by a combination of the isec and value, so
@@ -177,8 +177,8 @@ void UnwindInfoSection::addSymbol(const Defined *d) {
   auto p = symbols.insert({{d->isec, d->value}, d});
   // If we have multiple symbols at the same address, only one of them can have
   // an associated CUE.
-  if (!p.second && d->compactUnwind) {
-    assert(!p.first->second->compactUnwind);
+  if (!p.second && d->unwindEntry) {
+    assert(!p.first->second->unwindEntry);
     p.first->second = d;
   }
 }
@@ -202,11 +202,19 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
     Reloc &r = isec->relocs[i];
     assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
 
+    // Functions and LSDA entries always reside in the same object file as the
+    // compact unwind entries that references them, and thus appear as section
+    // relocs. There is no need to prepare them. We only prepare relocs for
+    // personality functions.
     if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
         offsetof(CompactUnwindEntry<Ptr>, personality))
       continue;
 
     if (auto *s = r.referent.dyn_cast<Symbol *>()) {
+      // Personality functions are nearly always system-defined (e.g.,
+      // ___gxx_personality_v0 for C++) and relocated as dylib symbols.  When an
+      // application provides its own personality function, it might be
+      // referenced by an extern Defined symbol reloc, or a local section reloc.
       if (auto *defined = dyn_cast<Defined>(s)) {
         // XXX(vyng) This is a a special case for handling duplicate personality
         // symbols. Note that LD64's behavior is a bit 
diff erent and it is
@@ -291,13 +299,13 @@ void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
     const Defined *d = symbolsVec[i].second;
     // Write the functionAddress.
     writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
-    if (!d->compactUnwind)
+    if (!d->unwindEntry)
       return;
 
     // Write the rest of the CUE.
-    memcpy(buf + sizeof(Ptr), d->compactUnwind->data.data(),
-           d->compactUnwind->data.size());
-    for (const Reloc &r : d->compactUnwind->relocs) {
+    memcpy(buf + sizeof(Ptr), d->unwindEntry->data.data(),
+           d->unwindEntry->data.size());
+    for (const Reloc &r : d->unwindEntry->relocs) {
       uint64_t referentVA = 0;
       if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
         if (!isa<Undefined>(referentSym)) {
@@ -432,9 +440,8 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
       // So we check their relocations instead.
       // FIXME: should we account for an LSDA at an absolute address? ld64 seems
       // to support it, but it seems unlikely to be used in practice.
-      Reloc *lsda1 =
-          findLsdaReloc(symbolsVec[*foldBegin].second->compactUnwind);
-      Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->compactUnwind);
+      Reloc *lsda1 = findLsdaReloc(symbolsVec[*foldBegin].second->unwindEntry);
+      Reloc *lsda2 = findLsdaReloc(symbolsVec[*foldEnd].second->unwindEntry);
       if (lsda1 == nullptr && lsda2 == nullptr)
         continue;
       if (lsda1 == nullptr || lsda2 == nullptr)
@@ -536,7 +543,8 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
 
   for (size_t idx : cuIndices) {
     lsdaIndex[idx] = entriesWithLsda.size();
-    if (findLsdaReloc(symbolsVec[idx].second->compactUnwind))
+    const Defined *d = symbolsVec[idx].second;
+    if (findLsdaReloc(d->unwindEntry))
       entriesWithLsda.push_back(idx);
   }
 
@@ -614,7 +622,7 @@ void UnwindInfoSectionImpl<Ptr>::writeTo(uint8_t *buf) const {
   for (size_t idx : entriesWithLsda) {
     const CompactUnwindEntry<Ptr> &cu = cuEntries[idx];
     const Defined *d = symbolsVec[idx].second;
-    if (Reloc *r = findLsdaReloc(d->compactUnwind)) {
+    if (Reloc *r = findLsdaReloc(d->unwindEntry)) {
       uint64_t va;
       if (auto *isec = r->referent.dyn_cast<InputSection *>()) {
         va = isec->getVA(r->addend);


        


More information about the llvm-commits mailing list