[lld] 8aa17d1 - [lld-macho] Move ICF members from InputSection to ConcatInputSection

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 24 19:23:22 PDT 2021


Author: Jez Ng
Date: 2021-06-24T22:23:12-04:00
New Revision: 8aa17d1eaeb48f8782661680d69c28f2458959ad

URL: https://github.com/llvm/llvm-project/commit/8aa17d1eaeb48f8782661680d69c28f2458959ad
DIFF: https://github.com/llvm/llvm-project/commit/8aa17d1eaeb48f8782661680d69c28f2458959ad.diff

LOG: [lld-macho] Move ICF members from InputSection to ConcatInputSection

`icfEqClass` only makes sense on ConcatInputSections since (in contrast
to literal sections) they are deduplicated as an atomic unit.

Similarly, `hasPersonality` and `replacement` don't make sense on
literal sections.

This mirrors LLD-ELF, which stores `icfEqClass` only on non-mergeable
sections.

Reviewed By: #lld-macho, gkm

Differential Revision: https://reviews.llvm.org/D104670

Added: 
    

Modified: 
    lld/MachO/ICF.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/UnwindInfoSection.cpp
    lld/MachO/Writer.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 9218fb6baa30..ce49dc903d4b 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -108,10 +108,19 @@ static bool equalsVariable(const ConcatInputSection *ia,
           return false;
         if (da->isAbsolute() != db->isAbsolute())
           return false;
-        if (da->isec)
-          if (da->isec->icfEqClass[icfPass % 2] !=
-              db->isec->icfEqClass[icfPass % 2])
+        if (da->isec) {
+          if (da->isec->kind() != db->isec->kind())
             return false;
+          if (const auto *isecA = dyn_cast<ConcatInputSection>(da->isec)) {
+            const auto *isecB = cast<ConcatInputSection>(db->isec);
+            if (isecA->icfEqClass[icfPass % 2] !=
+                isecB->icfEqClass[icfPass % 2])
+              return false;
+          } else {
+            // FIXME: implement ICF for other InputSection kinds
+            return false;
+          }
+        }
       } else if (isa<DylibSymbol>(sa)) {
         // There is one DylibSymbol per gotIndex and we already checked for
         // symbol equality, thus we know that these must be 
diff erent.
@@ -122,8 +131,16 @@ static bool equalsVariable(const ConcatInputSection *ia,
     } else {
       const auto *sa = ra.referent.get<InputSection *>();
       const auto *sb = rb.referent.get<InputSection *>();
-      if (sa->icfEqClass[icfPass % 2] != sb->icfEqClass[icfPass % 2])
+      if (sa->kind() != sb->kind())
         return false;
+      if (const auto *isecA = dyn_cast<ConcatInputSection>(sa)) {
+        const auto *isecB = cast<ConcatInputSection>(sb);
+        if (isecA->icfEqClass[icfPass % 2] != isecB->icfEqClass[icfPass % 2])
+          return false;
+      } else {
+        // FIXME: implement ICF for other InputSection kinds
+        return false;
+      }
     }
     return true;
   };
@@ -183,17 +200,19 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> func) {
 void ICF::run() {
   // Into each origin-section hash, combine all reloc referent section hashes.
   for (icfPass = 0; icfPass < 2; ++icfPass) {
-    parallelForEach(icfInputs, [&](InputSection *isec) {
+    parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
       uint64_t hash = isec->icfEqClass[icfPass % 2];
       for (const Reloc &r : isec->relocs) {
         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
           if (auto *dylibSym = dyn_cast<DylibSymbol>(sym))
             hash += dylibSym->stubsHelperIndex;
-          else if (auto *defined = dyn_cast<Defined>(sym))
-            hash +=
-                defined->value +
-                (defined->isec ? defined->isec->icfEqClass[icfPass % 2] : 0);
-          else
+          else if (auto *defined = dyn_cast<Defined>(sym)) {
+            hash += defined->value;
+            if (defined->isec)
+              if (auto *isec = cast<ConcatInputSection>(defined->isec))
+                hash += isec->icfEqClass[icfPass % 2];
+            // FIXME: implement ICF for other InputSection kinds
+          } else
             llvm_unreachable("foldIdenticalSections symbol kind");
         }
       }
@@ -202,10 +221,10 @@ void ICF::run() {
     });
   }
 
-  llvm::stable_sort(icfInputs,
-                    [](const InputSection *a, const InputSection *b) {
-                      return a->icfEqClass[0] < b->icfEqClass[0];
-                    });
+  llvm::stable_sort(
+      icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
+        return a->icfEqClass[0] < b->icfEqClass[0];
+      });
   forEachClass(
       [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); });
 

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index c656efc38024..78a7f00a18c5 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -47,10 +47,9 @@ static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) {
 
 // ICF needs to hash any section that might potentially be duplicated so
 // that it can match on content rather than identity.
-bool InputSection::isHashableForICF(bool isText) const {
-  if (auto const *concatIsec = dyn_cast<ConcatInputSection>(this))
-    if (concatIsec->shouldOmitFromOutput())
-      return false;
+bool ConcatInputSection::isHashableForICF(bool isText) const {
+  if (shouldOmitFromOutput())
+    return false;
   switch (sectionType(flags)) {
   case S_REGULAR:
     if (isText)
@@ -63,10 +62,9 @@ bool InputSection::isHashableForICF(bool isText) const {
   case S_8BYTE_LITERALS:
   case S_16BYTE_LITERALS:
   case S_LITERAL_POINTERS:
-    // FIXME(gkm): once literal sections are deduplicated, their content and
-    // identity correlate, so we can assign unique IDs to them rather than hash
-    // them.
-    return true;
+    // FIXME(jezng): We should not have any ConcatInputSections of these types
+    // when running ICF.
+    return false;
   case S_ZEROFILL:
   case S_GB_ZEROFILL:
   case S_NON_LAZY_SYMBOL_POINTERS:
@@ -89,7 +87,7 @@ bool InputSection::isHashableForICF(bool isText) const {
   }
 }
 
-void InputSection::hashForICF() {
+void ConcatInputSection::hashForICF() {
   assert(data.data()); // zeroFill section data has nullptr with non-zero size
   assert(icfEqClass[0] == 0); // don't overwrite a unique ID!
   // Turn-on the top bit to guarantee that valid hashes have no collisions

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index d713b6bbe0d7..3dd31d27be91 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -45,6 +45,7 @@ class InputSection {
   // Whether the data at \p off in this InputSection is live.
   virtual bool isLive(uint64_t off) const = 0;
   virtual void markLive(uint64_t off) = 0;
+  virtual InputSection *canonical() { return this; }
 
   InputFile *file = nullptr;
   StringRef name;
@@ -59,17 +60,6 @@ class InputSection {
   // is address assigned?
   bool isFinal = false;
 
-  bool isHashableForICF(bool isText) const;
-  void hashForICF();
-  InputSection *canonical() { return replacement ? replacement : this; }
-
-  // ICF can't fold functions with LSDA+personality
-  bool hasPersonality = false;
-  // Points to the surviving section after this one is folded by ICF
-  InputSection *replacement = nullptr;
-  // Equivalence-class ID for ICF
-  uint64_t icfEqClass[2] = {0, 0};
-
   ArrayRef<uint8_t> data;
   std::vector<Reloc> relocs;
 
@@ -105,13 +95,25 @@ class ConcatInputSection final : public InputSection {
   void markLive(uint64_t off) override { live = true; }
   bool isCoalescedWeak() const { return wasCoalesced && numRefs == 0; }
   bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
+  bool isHashableForICF(bool isText) const;
+  void hashForICF();
   void writeTo(uint8_t *buf);
 
+  void foldIdentical(ConcatInputSection *redundant);
+  InputSection *canonical() override {
+    return replacement ? replacement : this;
+  }
+
   static bool classof(const InputSection *isec) {
     return isec->kind() == ConcatKind;
   }
 
-  void foldIdentical(ConcatInputSection *redundant);
+  // ICF can't fold functions with LSDA+personality
+  bool hasPersonality = false;
+  // Points to the surviving section after this one is folded by ICF
+  InputSection *replacement = nullptr;
+  // Equivalence-class ID for ICF
+  uint64_t icfEqClass[2] = {0, 0};
 
   // With subsections_via_symbols, most symbols have their own InputSection,
   // and for weak symbols (e.g. from inline functions), only the

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index f82395b9d03e..8ad71c04f866 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -153,7 +153,9 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
     Reloc &rFunc = isec->relocs[++i];
     assert(r.offset ==
            rFunc.offset + offsetof(CompactUnwindEntry<Ptr>, personality));
-    rFunc.referent.get<InputSection *>()->hasPersonality = true;
+    auto *referentIsec =
+        cast<ConcatInputSection>(rFunc.referent.get<InputSection *>());
+    referentIsec->hasPersonality = true;
 
     if (auto *s = r.referent.dyn_cast<Symbol *>()) {
       if (auto *undefined = dyn_cast<Undefined>(s)) {

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 8df5ac1494fd..5dab4d1aa314 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -957,7 +957,7 @@ void Writer::foldIdenticalSections() {
   // relocs to find every referenced InputSection, but that precludes easy
   // parallelization. Therefore, we hash every InputSection here where we have
   // them all accessible as a simple vector.
-  std::vector<InputSection *> hashable;
+  std::vector<ConcatInputSection *> hashable;
   // If an InputSection is ineligible for ICF, we give it a unique ID to force
   // it into an unfoldable singleton equivalence class.  Begin the unique-ID
   // space at inputSections.size(), so that it will never intersect with
@@ -967,12 +967,16 @@ void Writer::foldIdenticalSections() {
   // ICF::segregate()
   uint64_t icfUniqueID = inputSections.size();
   for (InputSection *isec : inputSections) {
-    if (isec->isHashableForICF(isec->parent == textOutputSection))
-      hashable.push_back(isec);
-    else
-      isec->icfEqClass[0] = ++icfUniqueID;
+    if (auto *concatIsec = dyn_cast<ConcatInputSection>(isec)) {
+      if (concatIsec->isHashableForICF(isec->parent == textOutputSection))
+        hashable.push_back(concatIsec);
+      else
+        concatIsec->icfEqClass[0] = ++icfUniqueID;
+    }
+    // FIXME: hash literal sections here?
   }
-  parallelForEach(hashable, [](InputSection *isec) { isec->hashForICF(); });
+  parallelForEach(hashable,
+                  [](ConcatInputSection *isec) { isec->hashForICF(); });
   // Now that every input section is either hashed or marked as unique,
   // run the segregation algorithm to detect foldable subsections
   ICF(textOutputSection->inputs).run();


        


More information about the llvm-commits mailing list