[lld] a9353db - [lld-macho] Simplify the handling of "no unwind info" functions

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 26 13:04:49 PDT 2021


Author: Jez Ng
Date: 2021-10-26T16:04:16-04:00
New Revision: a9353dbe517c522ac63818de2cde645acdc46834

URL: https://github.com/llvm/llvm-project/commit/a9353dbe517c522ac63818de2cde645acdc46834
DIFF: https://github.com/llvm/llvm-project/commit/a9353dbe517c522ac63818de2cde645acdc46834.diff

LOG: [lld-macho] Simplify the handling of "no unwind info" functions

This diff does away with `addEntriesForFunctionsWithoutUnwindInfo()`,
because `addSymbol()` can now determine which functions need those
entries.

While overhauling UnwindInfoSection, I also parallelized the relocation
of the contents of the CUEs. This somewhat offsets the time regression
from creating one InputSection per CUE (which was done in D109944).

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D109945

Added: 
    

Modified: 
    lld/MachO/UnwindInfoSection.cpp
    lld/MachO/UnwindInfoSection.h
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 6ffd35ffd033..dca537493de4 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/Parallel.h"
 
 using namespace llvm;
 using namespace llvm::MachO;
@@ -92,6 +93,14 @@ using namespace lld::macho;
 // TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
 // TODO(gkm): how do we align the 2nd-level pages?
 
+template <class Ptr> struct CompactUnwindEntry {
+  Ptr functionAddress;
+  uint32_t functionLength;
+  compact_unwind_encoding_t encoding;
+  Ptr personality;
+  Ptr lsda;
+};
+
 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;
 
 struct SecondLevelPage {
@@ -107,7 +116,7 @@ template <class Ptr>
 class UnwindInfoSectionImpl final : public UnwindInfoSection {
 public:
   void prepareRelocations(ConcatInputSection *) override;
-  void addSymbol(const Defined *) override;
+  void relocateCompactUnwind(std::vector<CompactUnwindEntry<Ptr>> &);
   void finalize() override;
   void writeTo(uint8_t *buf) const override;
 
@@ -131,20 +140,39 @@ class UnwindInfoSectionImpl final : public UnwindInfoSection {
 UnwindInfoSection::UnwindInfoSection()
     : SyntheticSection(segment_names::text, section_names::unwindInfo) {
   align = 4;
-  compactUnwindSection =
-      make<ConcatOutputSection>(section_names::compactUnwind);
 }
 
 void UnwindInfoSection::prepareRelocations() {
-  for (ConcatInputSection *isec : compactUnwindSection->inputs)
-    prepareRelocations(isec);
+  // This iteration needs to be deterministic, since prepareRelocations may add
+  // entries to the GOT. Hence the use of a MapVector for
+  // UnwindInfoSection::symbols.
+  for (const Defined *d : make_second_range(symbols))
+    if (d->compactUnwind)
+      prepareRelocations(d->compactUnwind);
 }
 
-template <class Ptr>
-void UnwindInfoSectionImpl<Ptr>::addSymbol(const Defined *d) {
-  if (d->compactUnwind) {
-    d->compactUnwind->parent = compactUnwindSection;
-    compactUnwindSection->addInput(d->compactUnwind);
+// Record function symbols that may need entries emitted in __unwind_info, which
+// stores unwind data for address ranges.
+//
+// Note that if several adjacent functions have the same unwind encoding, LSDA,
+// and personality function, they share one unwind entry. For this to work,
+// functions without unwind info need explicit "no unwind info" unwind entries
+// -- else the unwinder would think they have the unwind info of the closest
+// function with unwind info right before in the image. Thus, we add function
+// symbols for each unique address regardless of whether they have associated
+// unwind info.
+void UnwindInfoSection::addSymbol(const Defined *d) {
+  if (d->compactUnwind)
+    allEntriesAreOmitted = false;
+  // We don't yet know the final output address of this symbol, but we know that
+  // they are uniquely determined by a combination of the isec and value, so
+  // we use that as the key here.
+  auto p = symbols.insert({{d->isec, d->value}, d});
+  // If we have multiple symbols at the same address, only one of them can have
+  // an associated CUE.
+  if (!p.second && d->compactUnwind) {
+    assert(!p.first->second->compactUnwind);
+    p.first->second = d;
   }
 }
 
@@ -167,18 +195,6 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
     Reloc &r = isec->relocs[i];
     assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
 
-    if (r.offset % sizeof(CompactUnwindEntry<Ptr>) == 0) {
-      InputSection *referentIsec;
-      if (auto *isec = r.referent.dyn_cast<InputSection *>())
-        referentIsec = isec;
-      else
-        referentIsec = cast<Defined>(r.referent.dyn_cast<Symbol *>())->isec;
-
-      if (!cast<ConcatInputSection>(referentIsec)->shouldOmitFromOutput())
-        allEntriesAreOmitted = false;
-      continue;
-    }
-
     if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
         offsetof(CompactUnwindEntry<Ptr>, personality))
       continue;
@@ -260,17 +276,21 @@ static ConcatInputSection *checkTextSegment(InputSection *isec) {
 // relocations here: since we are not emitting the pre-link CU section, there
 // is no source address to make a relative location meaningful.
 template <class Ptr>
-static void
-relocateCompactUnwind(ConcatOutputSection *compactUnwindSection,
-                      std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
-  for (const ConcatInputSection *isec : compactUnwindSection->inputs) {
-    assert(isec->parent == compactUnwindSection);
-
-    uint8_t *buf =
-        reinterpret_cast<uint8_t *>(cuVector.data()) + isec->outSecOff;
-    memcpy(buf, isec->data.data(), isec->data.size());
-
-    for (const Reloc &r : isec->relocs) {
+void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
+    std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
+  auto symbolsVec = symbols.takeVector();
+  parallelForEachN(0, symbolsVec.size(), [&](size_t i) {
+    uint8_t *buf = reinterpret_cast<uint8_t *>(cuVector.data()) +
+                   i * sizeof(CompactUnwindEntry<Ptr>);
+    const Defined *d = symbolsVec[i].second;
+    // Write the functionAddress.
+    writeAddress(buf, d->getVA(), sizeof(Ptr) == 8 ? 3 : 2);
+    if (!d->compactUnwind)
+      return;
+
+    // Write the rest of the CUE.
+    memcpy(buf, d->compactUnwind->data.data(), d->compactUnwind->data.size());
+    for (const Reloc &r : d->compactUnwind->relocs) {
       uint64_t referentVA = 0;
       if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
         if (!isa<Undefined>(referentSym)) {
@@ -289,7 +309,7 @@ relocateCompactUnwind(ConcatOutputSection *compactUnwindSection,
       }
       writeAddress(buf + r.offset, referentVA, r.length);
     }
-  }
+  });
 }
 
 // There should only be a handful of unique personality pointers, so we can
@@ -319,39 +339,6 @@ encodePersonalities(const std::vector<CompactUnwindEntry<Ptr> *> &cuPtrVector,
           ") for compact unwind to encode");
 }
 
-// __unwind_info stores unwind data for address ranges. If several
-// adjacent functions have the same unwind encoding, LSDA, and personality
-// function, they share one unwind entry. For this to work, functions without
-// unwind info need explicit "no unwind info" unwind entries -- else the
-// unwinder would think they have the unwind info of the closest function
-// with unwind info right before in the image.
-template <class Ptr>
-static void addEntriesForFunctionsWithoutUnwindInfo(
-    std::vector<CompactUnwindEntry<Ptr>> &cuVector) {
-  DenseSet<Ptr> hasUnwindInfo;
-  for (CompactUnwindEntry<Ptr> &cuEntry : cuVector)
-    hasUnwindInfo.insert(cuEntry.functionAddress);
-
-  // Add explicit "has no unwind info" entries for all global and local symbols
-  // without unwind info.
-  auto markNoUnwindInfo = [&cuVector, &hasUnwindInfo](const Defined *d) {
-    if (d->isLive() && d->isec && isCodeSection(d->isec)) {
-      Ptr ptr = d->getVA();
-      if (!hasUnwindInfo.count(ptr))
-        cuVector.push_back({ptr, 0, 0, 0, 0});
-    }
-  };
-  for (Symbol *sym : symtab->getSymbols())
-    if (auto *d = dyn_cast<Defined>(sym))
-      markNoUnwindInfo(d);
-  for (const InputFile *file : inputFiles)
-    if (auto *objFile = dyn_cast<ObjFile>(file))
-      for (Symbol *sym : objFile->symbols)
-        if (auto *d = dyn_cast_or_null<Defined>(sym))
-          if (!d->isExternal())
-            markNoUnwindInfo(d);
-}
-
 static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
   // From compact_unwind_encoding.h:
   //  UNWIND_X86_64_MODE_STACK_IND:
@@ -380,7 +367,7 @@ static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
 // Scan the __LD,__compact_unwind entries and compute the space needs of
 // __TEXT,__unwind_info and __TEXT,__eh_frame
 template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
-  if (compactUnwindSection == nullptr)
+  if (symbols.empty())
     return;
 
   // At this point, the address space for __TEXT,__text has been
@@ -390,15 +377,8 @@ template <class Ptr> void UnwindInfoSectionImpl<Ptr>::finalize() {
   // we can fold adjacent CU entries with identical
   // encoding+personality+lsda. Folding is necessary because it reduces
   // the number of CU entries by as much as 3 orders of magnitude!
-  compactUnwindSection->finalize();
-  assert(compactUnwindSection->getSize() % sizeof(CompactUnwindEntry<Ptr>) ==
-         0);
-  size_t cuCount =
-      compactUnwindSection->getSize() / sizeof(CompactUnwindEntry<Ptr>);
-  cuVector.resize(cuCount);
-  relocateCompactUnwind(compactUnwindSection, cuVector);
-
-  addEntriesForFunctionsWithoutUnwindInfo(cuVector);
+  cuVector.resize(symbols.size());
+  relocateCompactUnwind(cuVector);
 
   // Rather than sort & fold the 32-byte entries directly, we create a
   // vector of pointers to entries and sort & fold that instead.

diff  --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 864f2db29dfa..f0d4690b94f9 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -11,37 +11,29 @@
 
 #include "ConcatOutputSection.h"
 #include "SyntheticSections.h"
+#include "llvm/ADT/MapVector.h"
 
 #include "mach-o/compact_unwind_encoding.h"
 
 namespace lld {
 namespace macho {
 
-template <class Ptr> struct CompactUnwindEntry {
-  Ptr functionAddress;
-  uint32_t functionLength;
-  compact_unwind_encoding_t encoding;
-  Ptr personality;
-  Ptr lsda;
-};
-
 class UnwindInfoSection : public SyntheticSection {
 public:
-  bool isNeeded() const override {
-    return !compactUnwindSection->inputs.empty() && !allEntriesAreOmitted;
-  }
+  // If all functions are free of unwind info, we can omit the unwind info
+  // section entirely.
+  bool isNeeded() const override { return !allEntriesAreOmitted; }
   uint64_t getSize() const override { return unwindInfoSize; }
-  virtual void addSymbol(const Defined *) = 0;
-  std::vector<ConcatInputSection *> getInputs() {
-    return compactUnwindSection->inputs;
-  }
+  void addSymbol(const Defined *);
   void prepareRelocations();
 
 protected:
   UnwindInfoSection();
   virtual void prepareRelocations(ConcatInputSection *) = 0;
 
-  ConcatOutputSection *compactUnwindSection;
+  llvm::MapVector<std::pair<const InputSection *, uint64_t /*Defined::value*/>,
+                  const Defined *>
+      symbols;
   uint64_t unwindInfoSize = 0;
   bool allEntriesAreOmitted = true;
 };

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index a53b2b68f4bd..27ad7b2802ba 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -677,7 +677,8 @@ void Writer::scanSymbols() {
         continue;
       if (defined->overridesWeakDef)
         in.weakBinding->addNonWeakDefinition(defined);
-      in.unwindInfo->addSymbol(defined);
+      if (!defined->isAbsolute() && isCodeSection(defined->isec))
+        in.unwindInfo->addSymbol(defined);
     } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
       // This branch intentionally doesn't check isLive().
       if (dysym->isDynamicLookup())
@@ -691,7 +692,8 @@ void Writer::scanSymbols() {
     if (auto *objFile = dyn_cast<ObjFile>(file))
       for (Symbol *sym : objFile->symbols) {
         if (auto *defined = dyn_cast_or_null<Defined>(sym))
-          if (!defined->isExternal() && defined->isLive())
+          if (!defined->isExternal() && defined->isLive() &&
+              !defined->isAbsolute() && isCodeSection(defined->isec))
             in.unwindInfo->addSymbol(defined);
       }
   }


        


More information about the llvm-commits mailing list