[lld] d9b6f7e - [lld-macho] Teach ICF to dedup functions with identical unwind info

Jez Ng via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 12 13:02:59 PST 2021


Author: Jez Ng
Date: 2021-11-12T16:02:49-05:00
New Revision: d9b6f7e312c11ff69779939bb2be0bd53936a333

URL: https://github.com/llvm/llvm-project/commit/d9b6f7e312c11ff69779939bb2be0bd53936a333
DIFF: https://github.com/llvm/llvm-project/commit/d9b6f7e312c11ff69779939bb2be0bd53936a333.diff

LOG: [lld-macho] Teach ICF to dedup functions with identical unwind info

Dedup'ing unwind info is tricky because each CUE contains a different
function address, if ICF operated naively and compared the entire
contents of each CUE, entries with identical unwind info but belonging
to different functions would never be considered identical. To work
around this problem, we slice away the function address before
performing ICF. We rely on `relocateCompactUnwind()` to correctly handle
these truncated input sections.

Here are the numbers before and after D109944, D109945, and this diff
were applied, as tested on my 3.2 GHz 16-Core Intel Xeon W:

Without any optimizations:

             base           diff           difference (95% CI)
  sys_time   0.849 ± 0.015  0.896 ± 0.012  [  +4.8% ..   +6.2%]
  user_time  3.357 ± 0.030  3.512 ± 0.023  [  +4.3% ..   +5.0%]
  wall_time  3.944 ± 0.039  4.032 ± 0.031  [  +1.8% ..   +2.6%]
  samples    40             38

With `-dead_strip`:

             base           diff           difference (95% CI)
  sys_time   0.847 ± 0.010  0.896 ± 0.012  [  +5.2% ..   +6.5%]
  user_time  3.377 ± 0.014  3.532 ± 0.015  [  +4.4% ..   +4.8%]
  wall_time  3.962 ± 0.024  4.060 ± 0.030  [  +2.1% ..   +2.8%]
  samples    47             30

With `-dead_strip` and `--icf=all`:

             base           diff           difference (95% CI)
  sys_time   0.935 ± 0.013  0.957 ± 0.018  [  +1.5% ..   +3.2%]
  user_time  3.472 ± 0.022  6.531 ± 0.046  [ +87.6% ..  +88.7%]
  wall_time  4.080 ± 0.040  5.329 ± 0.060  [ +30.0% ..  +31.2%]
  samples    37             30

Unsurprisingly, ICF is now a lot slower, likely due to the much larger
number of input sections it needs to process. But the rest of the
linker only suffers a mild slowdown.

Note that the compact-unwind-bad-reloc.s test was expanded because we
now handle the relocation for CUE's function address in a separate code
path from the rest of the CUE relocations. The extended test covers both
code paths.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D109946

Added: 
    

Modified: 
    lld/MachO/ICF.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/UnwindInfoSection.cpp
    lld/test/MachO/icf.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 57e1da3f9a7f..6c3a565e74a9 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -180,8 +180,31 @@ static bool equalsVariable(const ConcatInputSection *ia,
     }
     return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
   };
-  return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
-                    f);
+  if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
+    return false;
+
+  // If there are symbols with associated unwind info, check that the unwind
+  // info matches. For simplicity, we only handle the case where there are only
+  // symbols at offset zero within the section (which is typically the case with
+  // .subsections_via_symbols.)
+  auto hasCU = [](Defined *d) { return d->compactUnwind; };
+  auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasCU);
+  auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasCU);
+  if (itA == ia->symbols.end())
+    return itB == ib->symbols.end();
+  if (itB == ib->symbols.end())
+    return false;
+  const Defined *da = *itA;
+  const Defined *db = *itB;
+  if (da->compactUnwind->icfEqClass[icfPass % 2] !=
+          db->compactUnwind->icfEqClass[icfPass % 2] ||
+      da->value != 0 || db->value != 0)
+    return false;
+  auto isZero = [](Defined *d) { return d->value == 0; };
+  return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
+             ia->symbols.end() &&
+         std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
+             ib->symbols.end();
 }
 
 // Find the first InputSection after BEGIN whose equivalence class 
diff ers
@@ -337,18 +360,14 @@ void macho::foldIdenticalSections() {
     // FIXME: consider non-code __text sections as hashable?
     bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
                       !isec->shouldOmitFromOutput() && isec->isHashableForICF();
-    // ICF can't fold functions with unwind info
-    if (isHashable)
-      for (Defined *d : isec->symbols)
-        if (d->compactUnwind) {
-          isHashable = false;
-          break;
-        }
-
-    if (isHashable)
+    if (isHashable) {
       hashable.push_back(isec);
-    else
+      for (Defined *d : isec->symbols)
+        if (d->compactUnwind)
+          hashable.push_back(d->compactUnwind);
+    } else {
       isec->icfEqClass[0] = ++icfUniqueID;
+    }
   }
   parallelForEach(hashable,
                   [](ConcatInputSection *isec) { isec->hashForICF(); });

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 6e6a61e0d6d4..1b233920ffb2 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -904,16 +904,31 @@ void ObjFile::registerCompactUnwind() {
 
   for (SubsectionEntry &entry : *cuSubsecMap) {
     ConcatInputSection *isec = cast<ConcatInputSection>(entry.isec);
+    // Hack!! Since each CUE contains a 
diff erent function address, if ICF
+    // operated naively and compared the entire contents of each CUE, entries
+    // with identical unwind info but belonging to 
diff erent functions would
+    // never be considered equivalent. To work around this problem, we slice
+    // away the function address here. (Note that we do not adjust the offsets
+    // of the corresponding relocations.) We rely on `relocateCompactUnwind()`
+    // to correctly handle these truncated input sections.
+    isec->data = isec->data.slice(target->wordSize);
+
     ConcatInputSection *referentIsec;
-    for (const Reloc &r : isec->relocs) {
-      if (r.offset != 0)
+    for (auto it = isec->relocs.begin(); it != isec->relocs.end();) {
+      Reloc &r = *it;
+      // We only wish to handle the relocation for CUE::functionAddress.
+      if (r.offset != 0) {
+        ++it;
         continue;
+      }
       uint64_t add = r.addend;
       if (auto *sym = cast_or_null<Defined>(r.referent.dyn_cast<Symbol *>())) {
         // Check whether the symbol defined in this file is the prevailing one.
         // Skip if it is e.g. a weak def that didn't prevail.
-        if (sym->getFile() != this)
+        if (sym->getFile() != this) {
+          ++it;
           continue;
+        }
         add += sym->value;
         referentIsec = cast<ConcatInputSection>(sym->isec);
       } else {
@@ -926,16 +941,23 @@ void ObjFile::registerCompactUnwind() {
       // The functionAddress relocations are typically section relocations.
       // However, unwind info operates on a per-symbol basis, so we search for
       // the function symbol here.
-      auto it = llvm::lower_bound(
+      auto symIt = llvm::lower_bound(
           referentIsec->symbols, add,
           [](Defined *d, uint64_t add) { return d->value < add; });
       // The relocation should point at the exact address of a symbol (with no
       // addend).
-      if (it == referentIsec->symbols.end() || (*it)->value != add) {
+      if (symIt == referentIsec->symbols.end() || (*symIt)->value != add) {
         assert(referentIsec->wasCoalesced);
+        ++it;
         continue;
       }
-      (*it)->compactUnwind = isec;
+      (*symIt)->compactUnwind = isec;
+      // Since we've sliced away the functionAddress, we should remove the
+      // corresponding relocation too. Given that clang emits relocations in
+      // reverse order of address, this relocation should be at the end of the
+      // vector for most of our input object files, so this is typically an O(1)
+      // operation.
+      it = isec->relocs.erase(it);
     }
   }
 }

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index c499826e1059..06f8d449feb5 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -295,7 +295,8 @@ void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
       return;
 
     // Write the rest of the CUE.
-    memcpy(buf, d->compactUnwind->data.data(), d->compactUnwind->data.size());
+    memcpy(buf + sizeof(Ptr), d->compactUnwind->data.data(),
+           d->compactUnwind->data.size());
     for (const Reloc &r : d->compactUnwind->relocs) {
       uint64_t referentVA = 0;
       if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {
@@ -309,9 +310,8 @@ void UnwindInfoSectionImpl<Ptr>::relocateCompactUnwind(
         }
       } else {
         auto *referentIsec = r.referent.get<InputSection *>();
-        ConcatInputSection *concatIsec = checkTextSegment(referentIsec);
-        if (!concatIsec->shouldOmitFromOutput())
-          referentVA = referentIsec->getVA(r.addend);
+        checkTextSegment(referentIsec);
+        referentVA = referentIsec->getVA(r.addend);
       }
       writeAddress(buf + r.offset, referentVA, r.length);
     }

diff  --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s
index 2fc24052e9f0..5e61e941bffa 100644
--- a/lld/test/MachO/icf.s
+++ b/lld/test/MachO/icf.s
@@ -32,8 +32,9 @@
 # CHECK: [[#%x,CALL_RECURSIVE_2:]]          l     F __TEXT,__text _call_recursive_2
 # CHECK: [[#%x,CHECK_LENGTH_1:]]            l     F __TEXT,__text _check_length_1
 # CHECK: [[#%x,CHECK_LENGTH_2:]]            l     F __TEXT,__text _check_length_2
-# CHECK: [[#%x,HAS_UNWIND_1:]]              l     F __TEXT,__text _has_unwind_1
+# CHECK: [[#%x,HAS_UNWIND_2:]]              l     F __TEXT,__text _has_unwind_1
 # CHECK: [[#%x,HAS_UNWIND_2:]]              l     F __TEXT,__text _has_unwind_2
+# CHECK: [[#%x,HAS_UNWIND_3:]]              l     F __TEXT,__text _has_unwind_3
 # CHECK: [[#%x,MUTALLY_RECURSIVE_2:]]       l     F __TEXT,__text _mutually_recursive_1
 # CHECK: [[#%x,MUTALLY_RECURSIVE_2:]]       l     F __TEXT,__text _mutually_recursive_2
 # CHECK: [[#%x,INIT_2:]]                    l     F __TEXT,__text _init_1
@@ -64,8 +65,9 @@
 # CHECK: callq 0x[[#%x,CALL_RECURSIVE_2:]]          <_call_recursive_2>
 # CHECK: callq 0x[[#%x,CHECK_LENGTH_1:]]            <_check_length_1>
 # CHECK: callq 0x[[#%x,CHECK_LENGTH_2:]]            <_check_length_2>
-# CHECK: callq 0x[[#%x,HAS_UNWIND_1:]]              <_has_unwind_1>
 # CHECK: callq 0x[[#%x,HAS_UNWIND_2:]]              <_has_unwind_2>
+# CHECK: callq 0x[[#%x,HAS_UNWIND_2:]]              <_has_unwind_2>
+# CHECK: callq 0x[[#%x,HAS_UNWIND_3:]]              <_has_unwind_3>
 # CHECK: callq 0x[[#%x,MUTALLY_RECURSIVE_2:]]       <_mutually_recursive_2>
 # CHECK: callq 0x[[#%x,MUTALLY_RECURSIVE_2:]]       <_mutually_recursive_2>
 ## FIXME: Mutually-recursive functions with identical bodies (see below)
@@ -170,8 +172,7 @@ _check_length_2:
 _my_personality:
   mov $1345, %rax
 
-## No fold: functions have unwind info.
-## FIXME: Fold functions with identical unwind info.
+## Functions with identical unwind info should be folded.
 _has_unwind_1:
   .cfi_startproc
   .cfi_personality 155, _my_personality
@@ -186,6 +187,15 @@ _has_unwind_2:
   ret
   .cfi_endproc
 
+## This function has 
diff erent unwind info from the preceding two, and therefore
+## should not be folded.
+_has_unwind_3:
+  .cfi_startproc
+  .cfi_personality 155, _my_personality
+  .cfi_def_cfa_offset 8
+  ret
+  .cfi_endproc
+
 ## Fold: Mutually-recursive functions with symmetric bodies
 _mutually_recursive_1:
   callq _mutually_recursive_1 # call myself
@@ -253,6 +263,7 @@ _main:
   callq _check_length_2
   callq _has_unwind_1
   callq _has_unwind_2
+  callq _has_unwind_3
   callq _mutually_recursive_1
   callq _mutually_recursive_2
   callq _asymmetric_recursive_1


        


More information about the llvm-commits mailing list