[lld] 2a3a79c - [lld-macho][NFC] Preserve original symbol isec, unwindEntry and size (#88357)

via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 18 11:42:26 PDT 2024


Author: alx32
Date: 2024-04-18T11:42:22-07:00
New Revision: 2a3a79ce4c2149d7787d56f9841b66cacc9061d0

URL: https://github.com/llvm/llvm-project/commit/2a3a79ce4c2149d7787d56f9841b66cacc9061d0
DIFF: https://github.com/llvm/llvm-project/commit/2a3a79ce4c2149d7787d56f9841b66cacc9061d0.diff

LOG: [lld-macho][NFC] Preserve original symbol isec, unwindEntry and size (#88357)

Currently, when moving symbols from one `InputSection` to another (like
in ICF) we directly update the symbol's `isec`, `unwindEntry` and
`size`. By doing this we lose the original information. This information
will be needed in a future change. Since when moving symbols we always
set the symbol's `wasCoalesced` and `isec-> replacement`, we can just
use this info to conditionally get the information we need at access
time.

Added: 
    

Modified: 
    lld/MachO/ICF.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/MapFile.cpp
    lld/MachO/MarkLive.cpp
    lld/MachO/ObjC.cpp
    lld/MachO/Relocations.cpp
    lld/MachO/SectionPriorities.cpp
    lld/MachO/SymbolTable.cpp
    lld/MachO/Symbols.cpp
    lld/MachO/Symbols.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/UnwindInfoSection.cpp
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 0278bf7c6751a2..fc786b571dc64f 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -133,13 +133,13 @@ bool ICF::equalsConstant(const ConcatInputSection *ia,
       assert(isa<Defined>(sa));
       const auto *da = cast<Defined>(sa);
       const auto *db = cast<Defined>(sb);
-      if (!da->isec || !db->isec) {
+      if (!da->isec() || !db->isec()) {
         assert(da->isAbsolute() && db->isAbsolute());
         return da->value + ra.addend == db->value + rb.addend;
       }
-      isecA = da->isec;
+      isecA = da->isec();
       valueA = da->value;
-      isecB = db->isec;
+      isecB = db->isec();
       valueB = db->value;
     } else {
       isecA = ra.referent.get<InputSection *>();
@@ -191,10 +191,10 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
       const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
       if (da->isAbsolute())
         return true;
-      isecA = dyn_cast<ConcatInputSection>(da->isec);
+      isecA = dyn_cast<ConcatInputSection>(da->isec());
       if (!isecA)
         return true; // literal sections were checked in equalsConstant.
-      isecB = cast<ConcatInputSection>(db->isec);
+      isecB = cast<ConcatInputSection>(db->isec());
     } else {
       const auto *sa = ra.referent.get<InputSection *>();
       const auto *sb = rb.referent.get<InputSection *>();
@@ -212,7 +212,7 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
   // info matches. For simplicity, we only handle the case where there are only
   // symbols at offset zero within the section (which is typically the case with
   // .subsections_via_symbols.)
-  auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
+  auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
   const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
   const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
   if (itA == ia->symbols.end())
@@ -221,8 +221,8 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
     return false;
   const Defined *da = *itA;
   const Defined *db = *itB;
-  if (da->unwindEntry->icfEqClass[icfPass % 2] !=
-          db->unwindEntry->icfEqClass[icfPass % 2] ||
+  if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
+          db->unwindEntry()->icfEqClass[icfPass % 2] ||
       da->value != 0 || db->value != 0)
     return false;
   auto isZero = [](Defined *d) { return d->value == 0; };
@@ -289,13 +289,13 @@ void ICF::run() {
       for (const Reloc &r : isec->relocs) {
         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
           if (auto *defined = dyn_cast<Defined>(sym)) {
-            if (defined->isec) {
+            if (defined->isec()) {
               if (auto *referentIsec =
-                      dyn_cast<ConcatInputSection>(defined->isec))
+                      dyn_cast<ConcatInputSection>(defined->isec()))
                 hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
               else
-                hash += defined->isec->kind() +
-                        defined->isec->getOffset(defined->value);
+                hash += defined->isec()->kind() +
+                        defined->isec()->getOffset(defined->value);
             } else {
               hash += defined->value;
             }
@@ -368,8 +368,8 @@ void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
 
 void macho::markSymAsAddrSig(Symbol *s) {
   if (auto *d = dyn_cast_or_null<Defined>(s))
-    if (d->isec)
-      d->isec->keepUnique = true;
+    if (d->isec())
+      d->isec()->keepUnique = true;
 }
 
 void macho::markAddrSigSymbols() {
@@ -430,8 +430,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
     if (isFoldable) {
       foldable.push_back(isec);
       for (Defined *d : isec->symbols)
-        if (d->unwindEntry)
-          foldable.push_back(d->unwindEntry);
+        if (d->unwindEntry())
+          foldable.push_back(d->unwindEntry());
 
       // Some sections have embedded addends that foil ICF's hashing / equality
       // checks. (We can ignore embedded addends when doing ICF because the same

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index b36d390cc16ade..8d66b37534f47e 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1170,7 +1170,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
           continue;
         }
         add += sym->value;
-        referentIsec = cast<ConcatInputSection>(sym->isec);
+        referentIsec = cast<ConcatInputSection>(sym->isec());
       } else {
         referentIsec =
             cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
@@ -1191,7 +1191,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
         ++it;
         continue;
       }
-      d->unwindEntry = isec;
+      d->originalUnwindEntry = isec;
       // Now that the symbol points to the unwind entry, we can remove the reloc
       // that points from the unwind entry back to the symbol.
       //
@@ -1348,7 +1348,7 @@ targetSymFromCanonicalSubtractor(const InputSection *isec,
   }
   if (Invert)
     std::swap(pcSym, target);
-  if (pcSym->isec == isec) {
+  if (pcSym->isec() == isec) {
     if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
       fatal("invalid FDE relocation in __eh_frame");
   } else {
@@ -1420,7 +1420,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
       // We already have an explicit relocation for the CIE offset.
       cieIsec =
           targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
-              ->isec;
+              ->isec();
       dataOff += sizeof(uint32_t);
     } else {
       // If we haven't found a relocation, then the CIE offset is most likely
@@ -1480,15 +1480,15 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
       // to register the unwind entry under same symbol.
       // This is not particularly efficient, but we should run into this case
       // infrequently (only when handling the output of `ld -r`).
-      if (funcSym->isec)
-        funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec),
+      if (funcSym->isec())
+        funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec()),
                                      funcSym->value);
     } else {
       funcSym = findSymbolAtAddress(sections, funcAddr);
       ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
     }
     // The symbol has been coalesced, or already has a compact unwind entry.
-    if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
+    if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry()) {
       // We must prune unused FDEs for correctness, so we cannot rely on
       // -dead_strip being enabled.
       isec->live = false;
@@ -1497,7 +1497,8 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
 
     InputSection *lsdaIsec = nullptr;
     if (lsdaAddrRelocIt != isec->relocs.end()) {
-      lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
+      lsdaIsec =
+          targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec();
     } else if (lsdaAddrOpt) {
       uint64_t lsdaAddr = *lsdaAddrOpt;
       Section *sec = findContainingSection(sections, &lsdaAddr);
@@ -1507,7 +1508,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
     }
 
     fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
-    funcSym->unwindEntry = isec;
+    funcSym->originalUnwindEntry = isec;
     ehRelocator.commit();
   }
 

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 5c1e07cd21b1fb..904701731684b3 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -194,10 +194,8 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
   copy->live = false;
   copy->wasCoalesced = true;
   copy->replacement = this;
-  for (auto &copySym : copy->symbols) {
+  for (auto &copySym : copy->symbols)
     copySym->wasIdenticalCodeFolded = true;
-    copySym->size = 0;
-  }
 
   symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
   copy->symbols.clear();
@@ -207,7 +205,7 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
     return;
   for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) {
     assert((*it)->value == 0);
-    (*it)->unwindEntry = nullptr;
+    (*it)->originalUnwindEntry = nullptr;
   }
 }
 

diff  --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 2a31a5c09cdd22..5bcaeca48da2a2 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -77,8 +77,8 @@ static MapInfo gatherMapInfo() {
           // Only emit the prevailing definition of a symbol. Also, don't emit
           // the symbol if it is part of a cstring section (we use the literal
           // value instead, similar to ld64)
-          if (d->isec && d->getFile() == file &&
-              !isa<CStringInputSection>(d->isec)) {
+          if (d->isec() && d->getFile() == file &&
+              !isa<CStringInputSection>(d->isec())) {
             isReferencedFile = true;
             if (!d->isLive())
               info.deadSymbols.push_back(d);
@@ -155,6 +155,12 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
                  target->wordSize, sym->getName().str().data());
 }
 
+static uint64_t getSymSizeForMap(Defined *sym) {
+  if (sym->wasIdenticalCodeFolded)
+    return 0;
+  return sym->size;
+}
+
 void macho::writeMapFile() {
   if (config->mapFile.empty())
     return;
@@ -201,9 +207,10 @@ void macho::writeMapFile() {
   auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
     for (const ConcatInputSection *isec : arr) {
       for (Defined *sym : isec->symbols) {
-        if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
+        if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
           os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
-                       sym->size, readerToFileOrdinal[sym->getFile()],
+                       getSymSizeForMap(sym),
+                       readerToFileOrdinal[sym->getFile()],
                        sym->getName().str().data());
       }
     }
@@ -255,7 +262,7 @@ void macho::writeMapFile() {
     os << "#        \tSize    \tFile  Name\n";
     for (Defined *sym : info.deadSymbols) {
       assert(!sym->isLive());
-      os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
+      os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym),
                    readerToFileOrdinal[sym->getFile()],
                    sym->getName().str().data());
     }

diff  --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index a37213d5613afb..c26c3aa321197e 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -110,10 +110,10 @@ void MarkLiveImpl<RecordWhyLive>::addSym(
     if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
       printWhyLive(s, prev);
   if (auto *d = dyn_cast<Defined>(s)) {
-    if (d->isec)
-      enqueue(d->isec, d->value, prev);
-    if (d->unwindEntry)
-      enqueue(d->unwindEntry, 0, prev);
+    if (d->isec())
+      enqueue(d->isec(), d->value, prev);
+    if (d->unwindEntry())
+      enqueue(d->unwindEntry(), 0, prev);
   }
 }
 
@@ -179,7 +179,7 @@ void MarkLiveImpl<RecordWhyLive>::markTransitively() {
           if (s->isLive()) {
             InputSection *referentIsec = nullptr;
             if (auto *d = dyn_cast<Defined>(s))
-              referentIsec = d->isec;
+              referentIsec = d->isec();
             enqueue(isec, 0, makeEntry(referentIsec, nullptr));
           }
         } else {

diff  --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index c28f2b42a72ce8..95fe0c9374f150 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -191,8 +191,8 @@ static StringRef getReferentString(const Reloc &r) {
   if (auto *isec = r.referent.dyn_cast<InputSection *>())
     return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
   auto *sym = cast<Defined>(r.referent.get<Symbol *>());
-  return cast<CStringInputSection>(sym->isec)->getStringRefAtOffset(sym->value +
-                                                                    r.addend);
+  return cast<CStringInputSection>(sym->isec())
+      ->getStringRefAtOffset(sym->value + r.addend);
 }
 
 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
@@ -306,7 +306,7 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
     return nullptr;
   };
 
-  const auto *classIsec = cast<ConcatInputSection>(classSym->isec);
+  const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
 
   // Parse instance methods.
   if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
@@ -314,7 +314,7 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
                  MK_Instance);
 
   // Class methods are contained in the metaclass.
-  if (const auto *r = classSym->isec->getRelocAt(classLayout.metaClassOffset))
+  if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
     if (const auto *classMethodsIsec = getMethodsIsec(
             cast<ConcatInputSection>(r->getReferentInputSection())))
       parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
@@ -561,9 +561,9 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
   if (!sym)
     return;
 
-  if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec))
+  if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
     eraseISec(cisec);
-  else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
+  else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
     uint32_t totalOffset = sym->value + reloc->addend;
     StringPiece &piece = csisec->getStringPiece(totalOffset);
     piece.live = false;
@@ -588,7 +588,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
     assert(catNameSym && "Category does not have a valid name Symbol");
 
     collectSectionWriteInfoFromIsec<CStringSection>(
-        catNameSym->isec, infoCategoryWriter.catNameInfo);
+        catNameSym->isec(), infoCategoryWriter.catNameInfo);
   }
 
   // Collect writer info from all the category lists (we're assuming they all
@@ -599,7 +599,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
       if (Defined *ptrList =
               tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
         collectSectionWriteInfoFromIsec<ConcatOutputSection>(
-            ptrList->isec, infoCategoryWriter.catPtrListInfo);
+            ptrList->isec(), infoCategoryWriter.catPtrListInfo);
         // we've successfully collected data, so we can break
         break;
       }
@@ -627,7 +627,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
   // platform pointer size, but to simplify implementation we always just read
   // the lower 32b which should be good enough.
   uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
-      ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+      ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
 
   ptrList.structCount += protocolCount;
   ptrList.structSize = target->wordSize;
@@ -636,7 +636,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
       (protocolCount * target->wordSize) +
       /*header(count)*/ protocolListHeaderLayout.totalSize +
       /*extra null value*/ target->wordSize;
-  assert(expectedListSize == ptrListSym->isec->data.size() &&
+  assert(expectedListSize == ptrListSym->isec()->data.size() &&
          "Protocol list does not match expected size");
 
   // Suppress unsuded var warning
@@ -644,7 +644,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
 
   uint32_t off = protocolListHeaderLayout.totalSize;
   for (uint32_t inx = 0; inx < protocolCount; ++inx) {
-    const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+    const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
     assert(reloc && "No reloc found at protocol list offset");
 
     auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -653,7 +653,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
     ptrList.allPtrs.push_back(listSym);
     off += target->wordSize;
   }
-  assert((ptrListSym->isec->getRelocAt(off) == nullptr) &&
+  assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
          "expected null terminating protocol");
   assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
          "Protocol list end offset does not match expected size");
@@ -678,9 +678,9 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
   assert(ptrListSym && "Reloc does not have a valid Defined");
 
   uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
-      ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+      ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
   uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
-      ptrListSym->isec->data.data() + listHeaderLayout.structCountOffset);
+      ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
   assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
 
   assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
@@ -690,12 +690,12 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
 
   uint32_t expectedListSize =
       listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
-  assert(expectedListSize == ptrListSym->isec->data.size() &&
+  assert(expectedListSize == ptrListSym->isec()->data.size() &&
          "Pointer list does not match expected size");
 
   for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
        off += target->wordSize) {
-    const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+    const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
     assert(reloc && "No reloc found at pointer list offset");
 
     auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -1054,7 +1054,7 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
   r.offset = offset;
   r.addend = 0;
   r.referent = const_cast<Symbol *>(refTo);
-  refFrom->isec->relocs.push_back(r);
+  refFrom->isec()->relocs.push_back(r);
 }
 
 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
@@ -1076,7 +1076,7 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
       if (!categorySym->getName().starts_with(objc::symbol_names::category))
         continue;
 
-      auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec);
+      auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
       assert(catBodyIsec &&
              "Category data section is not an ConcatInputSection");
 

diff  --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index 4e840c6912cc57..afe7f454e6a230 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -24,7 +24,7 @@ static_assert(sizeof(void *) != 8 || sizeof(Reloc) == 24,
 InputSection *Reloc::getReferentInputSection() const {
   if (const auto *sym = referent.dyn_cast<Symbol *>()) {
     if (const auto *d = dyn_cast<Defined>(sym))
-      return d->isec;
+      return d->isec();
     return nullptr;
   } else {
     return referent.get<InputSection *>();

diff  --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 976ea03980e963..907aee29d2386f 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -236,7 +236,7 @@ DenseMap<const InputSection *, size_t> CallGraphSort::run() {
         // section.
         for (Symbol *sym : isec->getFile()->symbols) {
           if (auto *d = dyn_cast_or_null<Defined>(sym)) {
-            if (d->isec == isec)
+            if (d->isec() == isec)
               os << sym->getName() << "\n";
           }
         }
@@ -258,7 +258,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
   if (it == priorities.end())
     return std::nullopt;
   const SymbolPriorityEntry &entry = it->second;
-  const InputFile *f = sym->isec->getFile();
+  const InputFile *f = sym->isec()->getFile();
   if (!f)
     return entry.anyObjectFile;
   // We don't use toString(InputFile *) here because it returns the full path
@@ -287,7 +287,7 @@ void macho::PriorityBuilder::extractCallGraphProfile() {
       if (fromSym && toSym &&
           (!hasOrderFile ||
            (!getSymbolPriority(fromSym) && !getSymbolPriority(toSym))))
-        callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
+        callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count;
     }
   }
 }
@@ -370,7 +370,7 @@ macho::PriorityBuilder::buildInputSectionPriorities() {
     std::optional<size_t> symbolPriority = getSymbolPriority(sym);
     if (!symbolPriority)
       return;
-    size_t &priority = sectionPriorities[sym->isec];
+    size_t &priority = sectionPriorities[sym->isec()];
     priority = std::max(priority, *symbolPriority);
   };
 

diff  --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp
index 755ff270e2f7a9..f0a92da8777e13 100644
--- a/lld/MachO/SymbolTable.cpp
+++ b/lld/MachO/SymbolTable.cpp
@@ -80,14 +80,14 @@ static void transplantSymbolsAtOffset(InputSection *fromIsec,
       // iterator. However, that is typically the case for files that have
       // .subsections_via_symbols set.
       insertIt = toIsec->symbols.insert(insertIt, d);
-      d->isec = toIsec;
+      d->originalIsec = toIsec;
       d->value = toOff;
       // We don't want to have more than one unwindEntry at a given address, so
       // drop the redundant ones. We We can safely drop the unwindEntries of
       // the symbols in fromIsec since we will be adding another unwindEntry as
       // we finish parsing toIsec's file. (We can assume that toIsec has its
       // own unwindEntry because of the ODR.)
-      d->unwindEntry = nullptr;
+      d->originalUnwindEntry = nullptr;
     }
     return true;
   });
@@ -121,8 +121,8 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
           // in ObjFile::parseSymbols() such that extern weak symbols appear
           // last, so we don't need to worry about subsequent symbols being
           // added to an already-coalesced section.
-          if (defined->isec)
-            transplantSymbolsAtOffset(concatIsec, defined->isec,
+          if (defined->isec())
+            transplantSymbolsAtOffset(concatIsec, defined->isec(),
                                       /*skip=*/nullptr, value, defined->value);
         }
         return defined;
@@ -130,7 +130,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
 
       if (defined->isWeakDef()) {
         if (auto concatIsec =
-                dyn_cast_or_null<ConcatInputSection>(defined->isec)) {
+                dyn_cast_or_null<ConcatInputSection>(defined->isec())) {
           concatIsec->wasCoalesced = true;
           if (isec)
             transplantSymbolsAtOffset(concatIsec, isec, defined, defined->value,
@@ -212,7 +212,7 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file,
 Defined *SymbolTable::aliasDefined(Defined *src, StringRef target,
                                    InputFile *newFile, bool makePrivateExtern) {
   bool isPrivateExtern = makePrivateExtern || src->privateExtern;
-  return addDefined(target, newFile, src->isec, src->value, src->size,
+  return addDefined(target, newFile, src->isec(), src->value, src->size,
                     src->isWeakDef(), isPrivateExtern,
                     src->referencedDynamically, src->noDeadStrip,
                     src->weakDefCanBeHidden);

diff  --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 4428bd4d9dc909..7bac78a59e4fec 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -63,8 +63,8 @@ Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
       wasIdenticalCodeFolded(false),
       referencedDynamically(isReferencedDynamically), noDeadStrip(noDeadStrip),
       interposable(interposable), weakDefCanBeHidden(isWeakDefCanBeHidden),
-      weakDef(isWeakDef), external(isExternal), isec(isec), value(value),
-      size(size) {
+      weakDef(isWeakDef), external(isExternal), originalIsec(isec),
+      value(value), size(size) {
   if (isec) {
     isec->symbols.push_back(this);
     // Maintain sorted order.
@@ -82,7 +82,7 @@ Defined::Defined(StringRefZ name, InputFile *file, InputSection *isec,
 }
 
 bool Defined::isTlv() const {
-  return !isAbsolute() && isThreadLocalVariables(isec->getFlags());
+  return !isAbsolute() && isThreadLocalVariables(originalIsec->getFlags());
 }
 
 uint64_t Defined::getVA() const {
@@ -91,7 +91,7 @@ uint64_t Defined::getVA() const {
   if (isAbsolute())
     return value;
 
-  if (!isec->isFinal) {
+  if (!isec()->isFinal) {
     // A target arch that does not use thunks ought never ask for
     // the address of a function that has not yet been finalized.
     assert(target->usesThunks());
@@ -102,24 +102,28 @@ uint64_t Defined::getVA() const {
     // expedient to return a contrived out-of-range address.
     return TargetInfo::outOfRangeVA;
   }
-  return isec->getVA(value);
+  return isec()->getVA(value);
 }
 
 ObjFile *Defined::getObjectFile() const {
-  return isec ? dyn_cast_or_null<ObjFile>(isec->getFile()) : nullptr;
-}
-
-void Defined::canonicalize() {
-  if (unwindEntry)
-    unwindEntry = unwindEntry->canonical();
-  if (isec)
-    isec = isec->canonical();
+  return originalIsec ? dyn_cast_or_null<ObjFile>(originalIsec->getFile())
+                      : nullptr;
 }
 
 std::string Defined::getSourceLocation() {
-  if (!isec)
+  if (!originalIsec)
     return {};
-  return isec->getSourceLocation(value);
+  return originalIsec->getSourceLocation(value);
+}
+
+// Get the canonical InputSection of the symbol.
+InputSection *Defined::isec() const {
+  return originalIsec ? originalIsec->canonical() : nullptr;
+}
+
+// Get the canonical unwind entry of the symbol.
+ConcatInputSection *Defined::unwindEntry() const {
+  return originalUnwindEntry ? originalUnwindEntry->canonical() : nullptr;
 }
 
 uint64_t DylibSymbol::getVA() const {

diff  --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index f30294dbea9cb2..be5d135b8cd54f 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -129,7 +129,7 @@ class Defined : public Symbol {
   bool isTlv() const override;
 
   bool isExternal() const { return external; }
-  bool isAbsolute() const { return isec == nullptr; }
+  bool isAbsolute() const { return originalIsec == nullptr; }
 
   uint64_t getVA() const override;
 
@@ -139,9 +139,11 @@ class Defined : public Symbol {
 
   std::string getSourceLocation();
 
-  // Ensure this symbol's pointers to InputSections point to their canonical
-  // copies.
-  void canonicalize();
+  // Get the canonical InputSection of the symbol.
+  InputSection *isec() const;
+
+  // Get the canonical unwind entry of the symbol.
+  ConcatInputSection *unwindEntry() const;
 
   static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }
 
@@ -182,14 +184,17 @@ class Defined : public Symbol {
   const bool external : 1;
 
 public:
-  InputSection *isec;
+  // The native InputSection of the symbol. The symbol may be moved to another
+  // InputSection in which case originalIsec->canonical() will point to the new
+  // InputSection
+  InputSection *originalIsec;
   // Contains the offset from the containing subsection. Note that this is
   // 
diff erent from nlist::n_value, which is the absolute address of the symbol.
   uint64_t value;
   // size is only calculated for regular (non-bitcode) symbols.
   uint64_t size;
   // This can be a subsection of either __compact_unwind or __eh_frame.
-  ConcatInputSection *unwindEntry = nullptr;
+  ConcatInputSection *originalUnwindEntry = nullptr;
 };
 
 // This enum does double-duty: as a symbol property, it indicates whether & how

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 6f6b66118b7a94..29070810bb049e 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -828,7 +828,7 @@ void ObjCSelRefsHelper::initialize() {
     auto Reloc = isec->relocs[0];
     if (const auto *sym = Reloc.referent.dyn_cast<Symbol *>()) {
       if (const auto *d = dyn_cast<Defined>(sym)) {
-        auto *cisec = cast<CStringInputSection>(d->isec);
+        auto *cisec = cast<CStringInputSection>(d->isec());
         auto methname = cisec->getStringRefAtOffset(d->value);
         methnameToSelref[CachedHashStringRef(methname)] = isec;
       }
@@ -1127,7 +1127,7 @@ void FunctionStartsSection::finalizeContents() {
     if (auto *objFile = dyn_cast<ObjFile>(file)) {
       for (const Symbol *sym : objFile->symbols) {
         if (const auto *defined = dyn_cast_or_null<Defined>(sym)) {
-          if (!defined->isec || !isCodeSection(defined->isec) ||
+          if (!defined->isec() || !isCodeSection(defined->isec()) ||
               !defined->isLive())
             continue;
           addrs.push_back(defined->getVA());
@@ -1228,7 +1228,7 @@ void SymtabSection::emitStabs() {
       if (!file || !file->compileUnit)
         continue;
 
-      symbolsNeedingStabs.emplace_back(defined, defined->isec->getFile()->id);
+      symbolsNeedingStabs.emplace_back(defined, defined->isec()->getFile()->id);
     }
   }
 
@@ -1243,7 +1243,7 @@ void SymtabSection::emitStabs() {
   InputFile *lastFile = nullptr;
   for (SortingPair &pair : symbolsNeedingStabs) {
     Defined *defined = pair.first;
-    InputSection *isec = defined->isec;
+    InputSection *isec = defined->isec();
     ObjFile *file = cast<ObjFile>(isec->getFile());
 
     if (lastFile == nullptr || lastFile != file) {
@@ -1256,7 +1256,7 @@ void SymtabSection::emitStabs() {
     }
 
     StabsEntry symStab;
-    symStab.sect = defined->isec->parent->index;
+    symStab.sect = defined->isec()->parent->index;
     symStab.strx = stringTableSection.addString(defined->getName());
     symStab.value = defined->getVA();
 
@@ -1407,7 +1407,7 @@ template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t *buf) const {
         nList->n_value = defined->value;
       } else {
         nList->n_type = scope | N_SECT;
-        nList->n_sect = defined->isec->parent->index;
+        nList->n_sect = defined->isec()->parent->index;
         // For the N_SECT symbol type, n_value is the address of the symbol
         nList->n_value = defined->getVA();
       }
@@ -2000,7 +2000,7 @@ void ObjCMethListSection::setUp() {
       assert(reloc && "Relocation expected at method list name slot");
       auto *def = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
       assert(def && "Expected valid Defined at method list name slot");
-      auto *cisec = cast<CStringInputSection>(def->isec);
+      auto *cisec = cast<CStringInputSection>(def->isec());
       assert(cisec && "Expected method name to be in a CStringInputSection");
       auto methname = cisec->getStringRefAtOffset(def->value);
       if (!ObjCSelRefsHelper::getSelRef(methname))
@@ -2107,7 +2107,7 @@ void ObjCMethListSection::writeRelativeOffsetForIsec(
   uint32_t symVA = def->getVA();
 
   if (useSelRef) {
-    auto *cisec = cast<CStringInputSection>(def->isec);
+    auto *cisec = cast<CStringInputSection>(def->isec());
     auto methname = cisec->getStringRefAtOffset(def->value);
     ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
     assert(selRef && "Expected all selector names to already be already be "

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 1466e7b4fb394f..0ac2f39a6180c7 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -185,16 +185,16 @@ UnwindInfoSection::UnwindInfoSection()
 // function symbols for each unique address regardless of whether they have
 // associated unwind info.
 void UnwindInfoSection::addSymbol(const Defined *d) {
-  if (d->unwindEntry)
+  if (d->unwindEntry())
     allEntriesAreOmitted = false;
   // We don't yet know the final output address of this symbol, but we know that
   // they are uniquely determined by a combination of the isec and value, so
   // we use that as the key here.
-  auto p = symbols.insert({{d->isec, d->value}, d});
+  auto p = symbols.insert({{d->isec(), d->value}, d});
   // If we have multiple symbols at the same address, only one of them can have
   // an associated unwind entry.
-  if (!p.second && d->unwindEntry) {
-    assert(p.first->second == d || !p.first->second->unwindEntry);
+  if (!p.second && d->unwindEntry()) {
+    assert(p.first->second == d || !p.first->second->unwindEntry());
     p.first->second = d;
   }
 }
@@ -204,16 +204,16 @@ void UnwindInfoSectionImpl::prepare() {
   // entries to the GOT. Hence the use of a MapVector for
   // UnwindInfoSection::symbols.
   for (const Defined *d : make_second_range(symbols))
-    if (d->unwindEntry) {
-      if (d->unwindEntry->getName() == section_names::compactUnwind) {
-        prepareRelocations(d->unwindEntry);
+    if (d->unwindEntry()) {
+      if (d->unwindEntry()->getName() == section_names::compactUnwind) {
+        prepareRelocations(d->unwindEntry());
       } else {
         // We don't have to add entries to the GOT here because FDEs have
         // explicit GOT relocations, so Writer::scanRelocations() will add those
         // GOT entries. However, we still need to canonicalize the personality
         // pointers (like prepareRelocations() does for CU entries) in order
         // to avoid overflowing the 3-personality limit.
-        FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
+        FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
         fde.personality = canonicalizePersonality(fde.personality);
       }
     }
@@ -279,7 +279,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) {
       if (auto *defined = dyn_cast<Defined>(s)) {
         // Check if we have created a synthetic symbol at the same address.
         Symbol *&personality =
-            personalityTable[{defined->isec, defined->value}];
+            personalityTable[{defined->isec(), defined->value}];
         if (personality == nullptr) {
           personality = defined;
           in.got->addEntry(defined);
@@ -321,7 +321,7 @@ void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) {
 Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) {
   if (auto *defined = dyn_cast_or_null<Defined>(personality)) {
     // Check if we have created a synthetic symbol at the same address.
-    Symbol *&synth = personalityTable[{defined->isec, defined->value}];
+    Symbol *&synth = personalityTable[{defined->isec(), defined->value}];
     if (synth == nullptr)
       synth = defined;
     else if (synth != defined)
@@ -340,12 +340,12 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
     CompactUnwindEntry &cu = cuEntries[i];
     const Defined *d = symbolsVec[i].second;
     cu.functionAddress = d->getVA();
-    if (!d->unwindEntry)
+    if (!d->unwindEntry())
       return;
 
     // If we have DWARF unwind info, create a slimmed-down CU entry that points
     // to it.
-    if (d->unwindEntry->getName() == section_names::ehFrame) {
+    if (d->unwindEntry()->getName() == section_names::ehFrame) {
       // The unwinder will look for the DWARF entry starting at the hint,
       // assuming the hint points to a valid CFI record start. If it
       // fails to find the record, it proceeds in a linear search through the
@@ -355,11 +355,11 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
       // but since we don't keep track of that, just encode zero -- the start of
       // the section is always the start of a CFI record.
       uint64_t dwarfOffsetHint =
-          d->unwindEntry->outSecOff <= DWARF_SECTION_OFFSET
-              ? d->unwindEntry->outSecOff
+          d->unwindEntry()->outSecOff <= DWARF_SECTION_OFFSET
+              ? d->unwindEntry()->outSecOff
               : 0;
       cu.encoding = target->modeDwarfEncoding | dwarfOffsetHint;
-      const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
+      const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry()];
       cu.functionLength = fde.funcLength;
       // Omit the DWARF personality from compact-unwind entry so that we
       // don't need to encode it.
@@ -368,14 +368,15 @@ void UnwindInfoSectionImpl::relocateCompactUnwind(
       return;
     }
 
-    assert(d->unwindEntry->getName() == section_names::compactUnwind);
+    assert(d->unwindEntry()->getName() == section_names::compactUnwind);
 
-    auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
-               target->wordSize;
+    auto buf =
+        reinterpret_cast<const uint8_t *>(d->unwindEntry()->data.data()) -
+        target->wordSize;
     cu.functionLength =
         support::endian::read32le(buf + cuLayout.functionLengthOffset);
     cu.encoding = support::endian::read32le(buf + cuLayout.encodingOffset);
-    for (const Reloc &r : d->unwindEntry->relocs) {
+    for (const Reloc &r : d->unwindEntry()->relocs) {
       if (r.offset == cuLayout.personalityOffset)
         cu.personality = r.referent.get<Symbol *>();
       else if (r.offset == cuLayout.lsdaOffset)

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 1c054912551e3e..b9fcb45ef86b27 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -725,10 +725,9 @@ void Writer::scanSymbols() {
     if (auto *defined = dyn_cast<Defined>(sym)) {
       if (!defined->isLive())
         continue;
-      defined->canonicalize();
       if (defined->overridesWeakDef)
         addNonWeakDefinition(defined);
-      if (!defined->isAbsolute() && isCodeSection(defined->isec))
+      if (!defined->isAbsolute() && isCodeSection(defined->isec()))
         in.unwindInfo->addSymbol(defined);
     } else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
       // This branch intentionally doesn't check isLive().
@@ -756,9 +755,8 @@ void Writer::scanSymbols() {
         if (auto *defined = dyn_cast_or_null<Defined>(sym)) {
           if (!defined->isLive())
             continue;
-          defined->canonicalize();
           if (!defined->isExternal() && !defined->isAbsolute() &&
-              isCodeSection(defined->isec))
+              isCodeSection(defined->isec()))
             in.unwindInfo->addSymbol(defined);
         }
       }


        


More information about the llvm-commits mailing list