[lld] f27e454 - [lld-macho] Implement ICF

Thu Jun 17 10:09:24 PDT 2021

Author: Greg McGary
Date: 2021-06-17T10:07:44-07:00
New Revision: f27e4548fc42876f66dac260ca3b6df0d5fd5fd6

URL: https://github.com/llvm/llvm-project/commit/f27e4548fc42876f66dac260ca3b6df0d5fd5fd6
DIFF: https://github.com/llvm/llvm-project/commit/f27e4548fc42876f66dac260ca3b6df0d5fd5fd6.diff

LOG: [lld-macho] Implement ICF

ICF = Identical C(ode|OMDAT) Folding

This is the LLD ELF/COFF algorithm, adapted for MachO. So far, only `-icf all` is supported. In order to support `-icf safe`, we will need to port address-significance tables (`.addrsig` directives) to MachO, which will come in later diffs.

`check-{llvm,clang,lld}` have 0 regressions for `lld -icf all` vs. baseline ld64.

We only run ICF on `__TEXT,__text` for reasons explained in the block comment in `ConcatOutputSection.cpp`.

Here is the perf impact for linking `chromium_framekwork` on a Mac Pro (16-core Xeon W) for the non-ICF case vs. pre-ICF:
```
    N           Min           Max        Median           Avg        Stddev
x  20          4.27          4.44          4.34         4.349   0.043029977
+  20          4.37          4.46         4.405        4.4115   0.025188761
Difference at 95.0% confidence
        0.0625 +/- 0.0225658
        1.43711% +/- 0.518873%
        (Student's t, pooled s = 0.0352566)
```

Reviewed By: #lld-macho, int3

Differential Revision: https://reviews.llvm.org/D103292

Added: 
    lld/MachO/ICF.cpp
    lld/MachO/ICF.h
    lld/test/MachO/icf-options.s
    lld/test/MachO/icf-scale.s
    lld/test/MachO/icf.s

Modified: 
    lld/MachO/CMakeLists.txt
    lld/MachO/ConcatOutputSection.cpp
    lld/MachO/ConcatOutputSection.h
    lld/MachO/Config.h
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputSection.cpp
    lld/MachO/InputSection.h
    lld/MachO/Options.td
    lld/MachO/Symbols.cpp
    lld/MachO/SyntheticSections.cpp
    lld/MachO/UnwindInfoSection.cpp
    lld/MachO/UnwindInfoSection.h
    lld/MachO/Writer.cpp
    lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd

Removed: 
    


################################################################################
diff  --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 61ac878ffbb7c..eff1812a6bb22 100644

--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -15,6 +15,7 @@ add_lld_library(lldMachO2
   DriverUtils.cpp
   Dwarf.cpp
   ExportTrie.cpp
+  ICF.cpp
   InputFiles.cpp
   InputSection.cpp
   LTO.cpp

diff  --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index 7926503cfee57..232ab5ce39fd4 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -17,8 +17,7 @@
 #include "lld/Common/Memory.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/ScopedPrinter.h"
-
-#include <algorithm>
+#include "llvm/Support/TimeProfiler.h"
 
 using namespace llvm;
 using namespace llvm::MachO;
@@ -357,3 +356,12 @@ void ConcatOutputSection::mergeFlags(InputSection *input) {
   flags |= input->flags;
   flags &= pureMask;
 }
+
+void ConcatOutputSection::eraseOmittedInputSections() {
+  // Remove the duplicates from inputs
+  inputs.erase(std::remove_if(inputs.begin(), inputs.end(),
+                              [](const ConcatInputSection *isec) -> bool {
+                                return isec->shouldOmitFromOutput();
+                              }),
+               inputs.end());
+}

diff  --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h
index e26869408dfd3..dad87e3388b2d 100644
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -40,6 +40,7 @@ class ConcatOutputSection final : public OutputSection {
   void finalize() override;
   bool needsThunks() const;
   uint64_t estimateStubsInRangeVA(size_t callIdx) const;
+  void eraseOmittedInputSections();
 
   void writeTo(uint8_t *buf) const override;
 

diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 2f7611406bc03..722b19f712531 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -57,6 +57,13 @@ enum class UndefinedSymbolTreatment {
   dynamic_lookup,
 };
 
+enum class ICFLevel {
+  unknown,
+  none,
+  safe,
+  all,
+};
+
 struct SectionAlign {
   llvm::StringRef segName;
   llvm::StringRef sectName;
@@ -126,6 +133,7 @@ struct Configuration {
   NamespaceKind namespaceKind = NamespaceKind::twolevel;
   UndefinedSymbolTreatment undefinedSymbolTreatment =
       UndefinedSymbolTreatment::error;
+  ICFLevel icfLevel = ICFLevel::none;
   llvm::MachO::HeaderFileType outputType;
   std::vector<llvm::StringRef> systemLibraryRoots;
   std::vector<llvm::StringRef> librarySearchPaths;

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 8217cc583eac6..d2d271271349f 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -698,6 +698,29 @@ getUndefinedSymbolTreatment(const ArgList &args) {
   return treatment;
 }
 
+static ICFLevel getICFLevel(const ArgList &args) {
+  bool noDeduplicate = args.hasArg(OPT_no_deduplicate);
+  StringRef icfLevelStr = args.getLastArgValue(OPT_icf);
+  auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
+                      .Cases("none", "", ICFLevel::none)
+                      .Case("safe", ICFLevel::safe)
+                      .Case("all", ICFLevel::all)
+                      .Default(ICFLevel::unknown);
+  if (icfLevel == ICFLevel::unknown) {
+    warn(Twine("unknown -icf OPTION `") + icfLevelStr +
+         "', defaulting to `none'");
+    icfLevel = ICFLevel::none;
+  } else if (icfLevel != ICFLevel::none && noDeduplicate) {
+    warn(Twine("`-icf " + icfLevelStr +
+               "' conflicts with -no_deduplicate, setting to `none'"));
+    icfLevel = ICFLevel::none;
+  } else if (icfLevel == ICFLevel::safe) {
+    warn(Twine("`-icf safe' is not yet implemented, reverting to `none'"));
+    icfLevel = ICFLevel::none;
+  }
+  return icfLevel;
+}
+
 static void warnIfDeprecatedOption(const Option &opt) {
   if (!opt.getGroup().isValid())
     return;
@@ -1096,6 +1119,8 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
 
   config->undefinedSymbolTreatment = getUndefinedSymbolTreatment(args);
 
+  config->icfLevel = getICFLevel(args);
+
   if (config->outputType == MH_EXECUTE)
     config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"),
                                          /*file=*/nullptr,

diff  --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
new file mode 100644
index 0000000000000..fe724799996d9
--- /dev/null
+++ b/lld/MachO/ICF.cpp
@@ -0,0 +1,257 @@
+//===- ICF.cpp ------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ICF.h"
+#include "ConcatOutputSection.h"
+#include "InputSection.h"
+#include "Symbols.h"
+#include "llvm/Support/Parallel.h"
+
+#include <atomic>
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::macho;
+
+ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
+  icfInputs.assign(inputs.begin(), inputs.end());
+}
+
+// ICF = Identical Code Folding
+//
+// We only fold __TEXT,__text, so this is really "code" folding, and not
+// "COMDAT" folding. String and scalar constant literals are deduplicated
+// elsewhere.
+//
+// Summary of segments & sections:
+//
+// Since folding never occurs across output-section boundaries,
+// ConcatOutputSection is the natural input for ICF.
+//
+// The __TEXT segment is readonly at the MMU. Some sections are already
+// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
+// synthetic and inherently free of duplicates (__TEXT,__stubs &
+// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
+// could work on __TEXT,__concat, but doing so induces many test failures.
+//
+// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
+// thus ineligible for ICF.
+//
+// The __DATA_CONST segment is read/write at the MMU, but is logically const to
+// the application after dyld applies fixups to pointer data. Some sections are
+// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
+// (__DATA_CONST,__got). There are no ICF opportunities here.
+//
+// The __DATA segment is read/write at the MMU, and as application-writeable
+// data, none of its sections are eligible for ICF.
+//
+// Please see the large block comment in lld/ELF/ICF.cpp for an explanation
+// of the segregation algorithm.
+//
+// FIXME(gkm): implement keep-unique attributes
+// FIXME(gkm): implement address-significance tables for MachO object files
+
+static unsigned icfPass = 0;
+static std::atomic<bool> icfRepeat{false};
+
+// Compare everything except the relocation referents
+static bool equalsConstant(const ConcatInputSection *ia,
+                           const ConcatInputSection *ib) {
+  if (ia->data.size() != ib->data.size())
+    return false;
+  if (ia->data != ib->data)
+    return false;
+  if (ia->flags != ib->flags)
+    return false;
+  if (ia->relocs.size() != ib->relocs.size())
+    return false;
+  auto f = [&](const Reloc &ra, const Reloc &rb) {
+    if (ra.type != rb.type)
+      return false;
+    if (ra.pcrel != rb.pcrel)
+      return false;
+    if (ra.length != rb.length)
+      return false;
+    if (ra.offset != rb.offset)
+      return false;
+    if (ra.addend != rb.addend)
+      return false;
+    if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
+      return false; // a nice place to breakpoint
+    return true;
+  };
+  return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
+                    f);
+}
+
+// Compare only the relocation referents
+static bool equalsVariable(const ConcatInputSection *ia,
+                           const ConcatInputSection *ib) {
+  assert(ia->relocs.size() == ib->relocs.size());
+  auto f = [&](const Reloc &ra, const Reloc &rb) {
+    if (ra.referent == rb.referent)
+      return true;
+    if (ra.referent.is<Symbol *>()) {
+      const auto *sa = ra.referent.get<Symbol *>();
+      const auto *sb = rb.referent.get<Symbol *>();
+      if (sa->kind() != sb->kind())
+        return false;
+      if (isa<Defined>(sa)) {
+        const auto *da = dyn_cast<Defined>(sa);
+        const auto *db = dyn_cast<Defined>(sb);
+        if (da->value != db->value)
+          return false;
+        if (da->isAbsolute() != da->isAbsolute())
+          return false;
+        if (da->isec)
+          if (da->isec->icfEqClass[icfPass % 2] !=
+              db->isec->icfEqClass[icfPass % 2])
+            return false;
+      } else if (isa<DylibSymbol>(sa)) {
+        // There is one DylibSymbol per gotIndex and we already checked for
+        // symbol equality, thus we know that these must be 
diff erent.
+        return false;
+      } else {
+        llvm_unreachable("equalsVariable symbol kind");
+      }
+    } else {
+      const auto *sa = ra.referent.get<InputSection *>();
+      const auto *sb = rb.referent.get<InputSection *>();
+      if (sa->icfEqClass[icfPass % 2] != sb->icfEqClass[icfPass % 2])
+        return false;
+    }
+    return true;
+  };
+  return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
+                    f);
+}
+
+// Find the first InputSection after BEGIN whose equivalence class 
diff ers
+size_t ICF::findBoundary(size_t begin, size_t end) {
+  uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
+  for (size_t i = begin + 1; i < end; ++i)
+    if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
+      return i;
+  return end;
+}
+
+// Invoke FUNC on subranges with matching equivalence class
+void ICF::forEachClassRange(size_t begin, size_t end,
+                            std::function<void(size_t, size_t)> func) {
+  while (begin < end) {
+    size_t mid = findBoundary(begin, end);
+    func(begin, mid);
+    begin = mid;
+  }
+}
+
+// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
+// with matching equivalence class
+void ICF::forEachClass(std::function<void(size_t, size_t)> func) {
+  // Only use threads when the benefits outweigh the overhead.
+  const size_t threadingThreshold = 1024;
+  if (icfInputs.size() < threadingThreshold) {
+    forEachClassRange(0, icfInputs.size(), func);
+    ++icfPass;
+    return;
+  }
+
+  // Shard into non-overlapping intervals, and call FUNC in parallel.  The
+  // sharding must be completed before any calls to FUNC are made so that FUNC
+  // can modify the InputSection in its shard without causing data races.
+  const size_t shards = 256;
+  size_t step = icfInputs.size() / shards;
+  size_t boundaries[shards + 1];
+  boundaries[0] = 0;
+  boundaries[shards] = icfInputs.size();
+  parallelForEachN(1, shards, [&](size_t i) {
+    boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
+  });
+  parallelForEachN(1, shards + 1, [&](size_t i) {
+    if (boundaries[i - 1] < boundaries[i]) {
+      forEachClassRange(boundaries[i - 1], boundaries[i], func);
+    }
+  });
+  ++icfPass;
+}
+
+void ICF::run() {
+  // Into each origin-section hash, combine all reloc referent section hashes.
+  for (icfPass = 0; icfPass < 2; ++icfPass) {
+    parallelForEach(icfInputs, [&](InputSection *isec) {
+      uint64_t hash = isec->icfEqClass[icfPass % 2];
+      for (const Reloc &r : isec->relocs) {
+        if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
+          if (auto *dylibSym = dyn_cast<DylibSymbol>(sym))
+            hash += dylibSym->stubsHelperIndex;
+          else if (auto *defined = dyn_cast<Defined>(sym))
+            hash +=
+                defined->value +
+                (defined->isec ? defined->isec->icfEqClass[icfPass % 2] : 0);
+          else
+            llvm_unreachable("foldIdenticalSections symbol kind");
+        }
+      }
+      // Set MSB to 1 to avoid collisions with non-hashed classes.
+      isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63);
+    });
+  }
+
+  llvm::stable_sort(icfInputs,
+                    [](const InputSection *a, const InputSection *b) {
+                      return a->icfEqClass[0] < b->icfEqClass[0];
+                    });
+  forEachClass(
+      [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); });
+
+  // Split equivalence groups by comparing relocations until convergence
+  do {
+    icfRepeat = false;
+    forEachClass([&](size_t begin, size_t end) {
+      segregate(begin, end, equalsVariable);
+    });
+  } while (icfRepeat);
+  log("ICF needed " + Twine(icfPass) + " iterations");
+
+  // Fold sections within equivalence classes
+  forEachClass([&](size_t begin, size_t end) {
+    if (end - begin < 2)
+      return;
+    ConcatInputSection *beginIsec = icfInputs[begin];
+    for (size_t i = begin + 1; i < end; ++i)
+      beginIsec->foldIdentical(icfInputs[i]);
+  });
+}
+
+// Split an equivalence class into smaller classes.
+void ICF::segregate(
+    size_t begin, size_t end,
+    std::function<bool(const ConcatInputSection *, const ConcatInputSection *)>
+        equals) {
+  while (begin < end) {
+    // Divide [begin, end) into two. Let mid be the start index of the
+    // second group.
+    auto bound = std::stable_partition(icfInputs.begin() + begin + 1,
+                                       icfInputs.begin() + end,
+                                       [&](ConcatInputSection *isec) {
+                                         return equals(icfInputs[begin], isec);
+                                       });
+    size_t mid = bound - icfInputs.begin();
+
+    // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
+    // equivalence class ID because every group ends with a unique index.
+    for (size_t i = begin; i < mid; ++i)
+      icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
+
+    // If we created a group, we need to iterate the main loop again.
+    if (mid != end)
+      icfRepeat = true;
+
+    begin = mid;
+  }
+}

diff  --git a/lld/MachO/ICF.h b/lld/MachO/ICF.h
new file mode 100644
index 0000000000000..767630f0d7eb1
--- /dev/null
+++ b/lld/MachO/ICF.h
@@ -0,0 +1,42 @@
+//===- ICF.h ----------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_ICF_H
+#define LLD_MACHO_ICF_H
+
+#include "lld/Common/LLVM.h"
+#include <vector>
+
+namespace lld {
+namespace macho {
+
+class ConcatInputSection;
+
+class ICF {
+public:
+  ICF(std::vector<ConcatInputSection *> &inputs);
+
+  void run();
+  void segregate(size_t begin, size_t end,
+                 std::function<bool(const ConcatInputSection *,
+                                    const ConcatInputSection *)>
+                     equals);
+  size_t findBoundary(size_t begin, size_t end);
+  void forEachClassRange(size_t begin, size_t end,
+                         std::function<void(size_t, size_t)> func);
+  void forEachClass(std::function<void(size_t, size_t)> func);
+
+  // ICF needs a copy of the inputs vector because its equivalence-class
+  // segregation algorithm destroys the proper sequence.
+  std::vector<ConcatInputSection *> icfInputs;
+};
+
+} // namespace macho
+} // namespace lld
+
+#endif

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 2fc08ff911c2f..d8d6734c737ff 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -642,10 +642,16 @@ void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
       auto *concatIsec = cast<ConcatInputSection>(isec);
 
       auto *nextIsec = make<ConcatInputSection>(*concatIsec);
-      nextIsec->data = isec->data.slice(symbolOffset);
       nextIsec->numRefs = 0;
       nextIsec->wasCoalesced = false;
-      isec->data = isec->data.slice(0, symbolOffset);
+      if (isZeroFill(isec->flags)) {
+        // Zero-fill sections have NULL data.data() non-zero data.size()
+        nextIsec->data = {nullptr, isec->data.size() - symbolOffset};
+        isec->data = {nullptr, symbolOffset};
+      } else {
+        nextIsec->data = isec->data.slice(symbolOffset);
+        isec->data = isec->data.slice(0, symbolOffset);
+      }
 
       // By construction, the symbol will be at offset zero in the new
       // subsection.

diff  --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index b56e496f15670..c656efc380249 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -12,6 +12,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
+#include "UnwindInfoSection.h"
 #include "Writer.h"
 #include "lld/Common/Memory.h"
 #include "llvm/Support/Endian.h"
@@ -44,6 +45,67 @@ static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) {
   return sym->getVA();
 }
 
+// ICF needs to hash any section that might potentially be duplicated so
+// that it can match on content rather than identity.
+bool InputSection::isHashableForICF(bool isText) const {
+  if (auto const *concatIsec = dyn_cast<ConcatInputSection>(this))
+    if (concatIsec->shouldOmitFromOutput())
+      return false;
+  switch (sectionType(flags)) {
+  case S_REGULAR:
+    if (isText)
+      return !hasPersonality;
+    // One might hope that we could hash __TEXT,__const subsections to fold
+    // references to duplicated values, but alas, many tests fail.
+    return false;
+  case S_CSTRING_LITERALS:
+  case S_4BYTE_LITERALS:
+  case S_8BYTE_LITERALS:
+  case S_16BYTE_LITERALS:
+  case S_LITERAL_POINTERS:
+    // FIXME(gkm): once literal sections are deduplicated, their content and
+    // identity correlate, so we can assign unique IDs to them rather than hash
+    // them.
+    return true;
+  case S_ZEROFILL:
+  case S_GB_ZEROFILL:
+  case S_NON_LAZY_SYMBOL_POINTERS:
+  case S_LAZY_SYMBOL_POINTERS:
+  case S_SYMBOL_STUBS:
+  case S_MOD_INIT_FUNC_POINTERS:
+  case S_MOD_TERM_FUNC_POINTERS:
+  case S_COALESCED:
+  case S_INTERPOSING:
+  case S_DTRACE_DOF:
+  case S_LAZY_DYLIB_SYMBOL_POINTERS:
+  case S_THREAD_LOCAL_REGULAR:
+  case S_THREAD_LOCAL_ZEROFILL:
+  case S_THREAD_LOCAL_VARIABLES:
+  case S_THREAD_LOCAL_VARIABLE_POINTERS:
+  case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS:
+    return false;
+  default:
+    llvm_unreachable("Section type");
+  }
+}
+
+void InputSection::hashForICF() {
+  assert(data.data()); // zeroFill section data has nullptr with non-zero size
+  assert(icfEqClass[0] == 0); // don't overwrite a unique ID!
+  // Turn-on the top bit to guarantee that valid hashes have no collisions
+  // with the small-integer unique IDs for ICF-ineligible sections
+  icfEqClass[0] = xxHash64(data) | (1ull << 63);
+}
+
+void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
+  align = std::max(align, copy->align);
+  copy->live = false;
+  copy->wasCoalesced = true;
+  numRefs += copy->numRefs;
+  copy->numRefs = 0;
+  copy->replacement = this;
+}
+
 void ConcatInputSection::writeTo(uint8_t *buf) {
   assert(!shouldOmitFromOutput());
 

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index e0d463ea30bc0..d713b6bbe0d78 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -23,6 +23,7 @@ namespace macho {
 
 class InputFile;
 class OutputSection;
+class Defined;
 
 class InputSection {
 public:
@@ -54,8 +55,20 @@ class InputSection {
   uint32_t align = 1;
   uint32_t flags = 0;
   uint32_t callSiteCount = 0;
-  bool isFinal = false; // is address assigned?
 
+  // is address assigned?
+  bool isFinal = false;
+
+  bool isHashableForICF(bool isText) const;
+  void hashForICF();
+  InputSection *canonical() { return replacement ? replacement : this; }
+
+  // ICF can't fold functions with LSDA+personality
+  bool hasPersonality = false;
+  // Points to the surviving section after this one is folded by ICF
+  InputSection *replacement = nullptr;
+  // Equivalence-class ID for ICF
+  uint64_t icfEqClass[2] = {0, 0};
 
   ArrayRef<uint8_t> data;
   std::vector<Reloc> relocs;
@@ -98,6 +111,8 @@ class ConcatInputSection final : public InputSection {
     return isec->kind() == ConcatKind;
   }
 
+  void foldIdentical(ConcatInputSection *redundant);
+
   // With subsections_via_symbols, most symbols have their own InputSection,
   // and for weak symbols (e.g. from inline functions), only the
   // InputSection from one translation unit will make it to the output,

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 481d7aa6cb25d..9f9b8d301528a 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -275,6 +275,13 @@ def no_branch_islands : Flag<["-"], "no_branch_islands">,
     HelpText<"Disable infra for branches beyond the maximum branch distance.">,
     Flags<[HelpHidden]>,
     Group<grp_opts>;
+def icf: Separate<["-"], "icf">,
+    HelpText<"Set level for identical code folding (default: none)">,
+    MetaVarName<"[none,safe,all]">,
+    Group<grp_opts>;
+def no_deduplicate : Flag<["-"], "no_deduplicate">,
+    HelpText<"Disable code deduplicaiton (synonym for `-icf none')">,
+    Group<grp_opts>;
 
 def grp_version : OptionGroup<"version">, HelpText<"VERSION TARGETING">;
 
@@ -601,10 +608,6 @@ def no_weak_imports : Flag<["-"], "no_weak_imports">,
     HelpText<"Fail if any symbols are weak imports, allowed to be NULL at runtime">,
     Flags<[HelpHidden]>,
     Group<grp_rare>;
-def no_deduplicate : Flag<["-"], "no_deduplicate">,
-    HelpText<"Omit the deduplication pass">,
-    Flags<[HelpHidden]>,
-    Group<grp_rare>;
 def verbose_deduplicate : Flag<["-"], "verbose_deduplicate">,
     HelpText<"Print function names eliminated by deduplication and the total size of code savings">,
     Flags<[HelpHidden]>,

diff  --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp
index 26a4a873677b8..47f30d4141fce 100644
--- a/lld/MachO/Symbols.cpp
+++ b/lld/MachO/Symbols.cpp
@@ -40,7 +40,9 @@ bool Symbol::isLive() const {
     // no_dead_strip or live_support. In that case, the section will know
     // that it's live but `used` might be false. Non-absolute symbols always
     // have to use the section's `live` bit as source of truth.
-    return d->isAbsolute() ? used : d->isec->isLive(d->value);
+    if (d->isAbsolute())
+      return used;
+    return d->isec->canonical()->isLive(d->value);
   }
 
   assert(!isa<CommonSymbol>(this) &&
@@ -57,7 +59,7 @@ uint64_t Defined::getVA() const {
   if (isAbsolute())
     return value;
 
-  if (!isec->isFinal) {
+  if (!isec->canonical()->isFinal) {
     // A target arch that does not use thunks ought never ask for
     // the address of a function that has not yet been finalized.
     assert(target->usesThunks());
@@ -68,7 +70,7 @@ uint64_t Defined::getVA() const {
     // expedient to return a contrived out-of-range address.
     return TargetInfo::outOfRangeVA;
   }
-  return isec->getVA(value);
+  return isec->canonical()->getVA(value);
 }
 
 uint64_t DylibSymbol::getVA() const {

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index ef84a9d7c0e8d..fec43002bb461 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -647,6 +647,9 @@ void FunctionStartsSection::finalizeContents() {
     if (const auto *defined = dyn_cast<Defined>(sym)) {
       if (!defined->isec || !isCodeSection(defined->isec) || !defined->isLive())
         continue;
+      if (const auto *concatIsec = dyn_cast<ConcatInputSection>(defined->isec))
+        if (concatIsec->shouldOmitFromOutput())
+          continue;
       // TODO: Add support for thumbs, in that case
       // the lowest bit of nextAddr needs to be set to 1.
       addrs.push_back(defined->getVA());

diff  --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 309e5ce64d73c..deeef18f0bb36 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -143,12 +143,18 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
   // work. But since there are usually just few personality functions
   // that are referenced from many places, at least some of them likely
   // live, it wouldn't reduce number of got entries.
-  for (Reloc &r : isec->relocs) {
+  for (size_t i = 0; i < isec->relocs.size(); ++i) {
+    Reloc &r = isec->relocs[i];
     assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
     if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
         offsetof(CompactUnwindEntry<Ptr>, personality))
       continue;
 
+    Reloc &rFunc = isec->relocs[++i];
+    assert(r.offset ==
+           rFunc.offset + offsetof(CompactUnwindEntry<Ptr>, personality));
+    rFunc.referent.get<InputSection *>()->hasPersonality = true;
+
     if (auto *s = r.referent.dyn_cast<Symbol *>()) {
       if (auto *undefined = dyn_cast<Undefined>(s)) {
         treatUndefinedSymbol(*undefined);

diff  --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
index 4f181099a5831..7ccf7a4dfde7e 100644
--- a/lld/MachO/UnwindInfoSection.h
+++ b/lld/MachO/UnwindInfoSection.h
@@ -13,9 +13,6 @@
 #include "SyntheticSections.h"
 
 #include "mach-o/compact_unwind_encoding.h"
-#include "llvm/ADT/DenseMap.h"
-
-#include <vector>
 
 namespace lld {
 namespace macho {

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 523f5013e22fc..8df5ac1494fd3 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -9,6 +9,7 @@
 #include "Writer.h"
 #include "ConcatOutputSection.h"
 #include "Config.h"
+#include "ICF.h"
 #include "InputFiles.h"
 #include "InputSection.h"
 #include "MapFile.h"
@@ -51,6 +52,7 @@ class Writer {
   void scanSymbols();
   template <class LP> void createOutputSections();
   template <class LP> void createLoadCommands();
+  void foldIdenticalSections();
   void finalizeAddresses();
   void finalizeLinkEditSegment();
   void assignAddresses(OutputSegment *);
@@ -76,6 +78,7 @@ class Writer {
 
   LCUuid *uuidCommand = nullptr;
   OutputSegment *linkEditSegment = nullptr;
+  DenseMap<NamePair, ConcatOutputSection *> concatOutputSections;
 };
 
 // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -885,7 +888,6 @@ template <class LP> void Writer::createOutputSections() {
   }
 
   // Then add input sections to output sections.
-  DenseMap<NamePair, ConcatOutputSection *> concatOutputSections;
   for (const auto &p : enumerate(inputSections)) {
     InputSection *isec = p.value();
     OutputSection *osec;
@@ -940,6 +942,47 @@ template <class LP> void Writer::createOutputSections() {
   linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit);
 }
 
+void Writer::foldIdenticalSections() {
+  if (config->icfLevel == ICFLevel::none)
+    return;
+  ConcatOutputSection *textOutputSection = concatOutputSections.lookup(
+      maybeRenameSection({segment_names::text, section_names::text}));
+  if (textOutputSection == nullptr)
+    return;
+
+  TimeTraceScope timeScope("Fold Identical Code Sections");
+  // The ICF equivalence-class segregation algorithm relies on pre-computed
+  // hashes of InputSection::data for the ConcatOutputSection::inputs and all
+  // sections referenced by their relocs. We could recursively traverse the
+  // relocs to find every referenced InputSection, but that precludes easy
+  // parallelization. Therefore, we hash every InputSection here where we have
+  // them all accessible as a simple vector.
+  std::vector<InputSection *> hashable;
+  // If an InputSection is ineligible for ICF, we give it a unique ID to force
+  // it into an unfoldable singleton equivalence class.  Begin the unique-ID
+  // space at inputSections.size(), so that it will never intersect with
+  // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
+  // coexist with equivalence-class IDs, this is not necessary, but might help
+  // someone keep the numbers straight in case we ever need to debug the
+  // ICF::segregate()
+  uint64_t icfUniqueID = inputSections.size();
+  for (InputSection *isec : inputSections) {
+    if (isec->isHashableForICF(isec->parent == textOutputSection))
+      hashable.push_back(isec);
+    else
+      isec->icfEqClass[0] = ++icfUniqueID;
+  }
+  parallelForEach(hashable, [](InputSection *isec) { isec->hashForICF(); });
+  // Now that every input section is either hashed or marked as unique,
+  // run the segregation algorithm to detect foldable subsections
+  ICF(textOutputSection->inputs).run();
+  size_t oldSize = textOutputSection->inputs.size();
+  textOutputSection->eraseOmittedInputSections();
+  size_t newSize = textOutputSection->inputs.size();
+  log("ICF kept " + Twine(newSize) + " removed " + Twine(oldSize - newSize) +
+      " of " + Twine(oldSize));
+}
+
 void Writer::finalizeAddresses() {
   TimeTraceScope timeScope("Finalize addresses");
   uint64_t pageSize = target->getPageSize();
@@ -1071,6 +1114,7 @@ template <class LP> void Writer::run() {
     in.stubHelper->setup();
   scanSymbols();
   createOutputSections<LP>();
+  foldIdenticalSections();
   // After this point, we create no new segments; HOWEVER, we might
   // yet create branch-range extension thunks for architectures whose
   // hardware call instructions have limited range, e.g., ARM(64).

diff  --git a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
index 716905997a912..f27ca44b0f861 100644
--- a/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
+++ b/lld/test/MachO/Inputs/MacOSX.sdk/usr/lib/libSystem.tbd
@@ -68,5 +68,5 @@ parent-umbrella:
     umbrella:     System
 exports:
   - targets:      [ x86_64-macos, x86_64-maccatalyst, arm64-macos ]
-    symbols:      [ ___nan ]
+    symbols:      [ ___nan, ___isnan, ___inf, ___isinf ]
 ...

diff  --git a/lld/test/MachO/icf-options.s b/lld/test/MachO/icf-options.s
new file mode 100644
index 0000000000000..4aca312ba81d8
--- /dev/null
+++ b/lld/test/MachO/icf-options.s
@@ -0,0 +1,65 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; mkdir %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+# RUN: %lld -lSystem -icf all -o %t/all %t/main.o 2>&1 \
+# RUN:     | FileCheck %s --check-prefix=DIAG-EMPTY --allow-empty
+# RUN: %lld -lSystem -icf none -o %t/none %t/main.o 2>&1 \
+# RUN:     | FileCheck %s --check-prefix=DIAG-EMPTY --allow-empty
+# RUN: %lld -lSystem -no_deduplicate -o %t/no_dedup %t/main.o 2>&1 \
+# RUN:     | FileCheck %s --check-prefix=DIAG-EMPTY --allow-empty
+# RUN: not %lld -lSystem -icf safe -o %t/safe %t/main.o 2>&1 \
+# RUN:     | FileCheck %s --check-prefix=DIAG-SAFE
+# RUN: not %lld -lSystem -icf junk -o %t/junk %t/main.o 2>&1 \
+# RUN:     | FileCheck %s --check-prefix=DIAG-JUNK
+# RUN: not %lld -lSystem -icf all -no_deduplicate -o %t/clash %t/main.o 2>&1 \
+# RUN:     | FileCheck %s --check-prefix=DIAG-CLASH
+
+# DIAG-EMPTY-NOT: {{.}}
+# DIAG-SAFE: `-icf safe' is not yet implemented, reverting to `none'
+# DIAG-JUNK: unknown -icf OPTION `junk', defaulting to `none'
+# DIAG-CLASH: `-icf all' conflicts with -no_deduplicate, setting to `none'
+
+# RUN: llvm-objdump -d --syms %t/all | FileCheck %s --check-prefix=FOLD
+# RUN: llvm-objdump -d --syms %t/none | FileCheck %s --check-prefix=NOOP
+# RUN: llvm-objdump -d --syms %t/no_dedup | FileCheck %s --check-prefix=NOOP
+
+# FOLD-LABEL: SYMBOL TABLE:
+# FOLD:       [[#%x,MAIN:]] g   F __TEXT,__text _main
+# FOLD:       [[#%x,F:]]    g   F __TEXT,__text _f1
+# FOLD:       [[#%x,F]]     g   F __TEXT,__text _f2
+
+# FOLD-LABEL: Disassembly of section __TEXT,__text:
+# FOLD:       [[#%x,MAIN]] <_main>:
+# FOLD-NEXT:  callq 0x[[#%x,F]]  <_f2>
+# FOLD-NEXT:  callq 0x[[#%x,F]]  <_f2>
+
+# NOOP-LABEL: SYMBOL TABLE:
+# NOOP:       [[#%x,MAIN:]] g   F __TEXT,__text _main
+# NOOP:       [[#%x,F1:]]   g   F __TEXT,__text _f1
+# NOOP:       [[#%x,F2:]]   g   F __TEXT,__text _f2
+
+# NOOP-LABEL: Disassembly of section __TEXT,__text:
+# NOOP:       [[#%x,MAIN]] <_main>:
+# NOOP-NEXT:  callq 0x[[#%x,F1]]  <_f1>
+# NOOP-NEXT:  callq 0x[[#%x,F2]]  <_f2>
+
+.subsections_via_symbols
+.text
+.p2align 2
+
+.globl _f1
+_f1:
+  movl $0, %eax
+  ret
+
+.globl _f2
+_f2:
+  movl $0, %eax
+  ret
+
+.globl _main
+_main:
+  callq _f1
+  callq _f2
+  ret

diff  --git a/lld/test/MachO/icf-scale.s b/lld/test/MachO/icf-scale.s
new file mode 100644
index 0000000000000..211ea945f4ca0
--- /dev/null
+++ b/lld/test/MachO/icf-scale.s
@@ -0,0 +1,81 @@
+# REQUIRES: x86
+# RUN: rm -rf %t*
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: %lld -lSystem -icf all -o %t %t.o
+# RUN: llvm-objdump -d --syms %t | FileCheck %s
+
+## When ICF has fewer than 1 Ki functions to segregate into equivalence classes,
+## it uses a sequential algorithm to avoid the overhead of threading.
+## At 1 Ki functions or more, when threading begins to pay-off, ICF employs its
+## parallel segregation algorithm. Here we generate 4 Ki functions to exercise
+## the parallel algorithm. There are 4 unique function bodies, each replicated
+## 1 Ki times. The resulting folded program should retain one instance for each
+## of the four unique functions.
+
+# CHECK-LABEL: SYMBOL TABLE:
+# CHECK: [[#%x,G0:]] g   F __TEXT,__text _g000000
+# CHECK: [[#%x,G1:]] g   F __TEXT,__text _g100000
+# CHECK: [[#%x,G2:]] g   F __TEXT,__text _g200000
+# CHECK: [[#%x,G3:]] g   F __TEXT,__text _g300000
+## . . . many intervening _gXXXXXX symbols
+# CHECK: [[#%x,G0]]  g   F __TEXT,__text _g033333
+# CHECK: [[#%x,G1]]  g   F __TEXT,__text _g133333
+# CHECK: [[#%x,G2]]  g   F __TEXT,__text _g233333
+# CHECK: [[#%x,G3]]  g   F __TEXT,__text _g333333
+
+# CHECK-LABEL: Disassembly of section __TEXT,__text:
+# CHECK-DAG: [[#%x,G0]]  <_g033333>:
+# CHECK-DAG: [[#%x,G1]]  <_g133333>:
+# CHECK-DAG: [[#%x,G2]]  <_g233333>:
+# CHECK-DAG: [[#%x,G3]]  <_g333333>:
+# CHECK-NOT: [[#]]       <_g{{.*}}>:
+
+.subsections_via_symbols
+.text
+.p2align 2
+
+.macro gen_4 c
+  .globl _g0\c, _g1\c, _g2\c, _g3\c
+  _g0\c:; movl $0, %eax; ret
+  _g1\c:; movl $1, %eax; ret
+  _g2\c:; movl $2, %eax; ret
+  _g3\c:; movl $3, %eax; ret
+.endm
+
+.macro gen_16 c
+  gen_4 0\c
+  gen_4 1\c
+  gen_4 2\c
+  gen_4 3\c
+.endm
+
+.macro gen_64 c
+  gen_16 0\c
+  gen_16 1\c
+  gen_16 2\c
+  gen_16 3\c
+.endm
+
+.macro gen_256 c
+  gen_64 0\c
+  gen_64 1\c
+  gen_64 2\c
+  gen_64 3\c
+.endm
+
+.macro gen_1024 c
+  gen_256 0\c
+  gen_256 1\c
+  gen_256 2\c
+  gen_256 3\c
+.endm
+
+gen_1024 0
+gen_1024 1
+gen_1024 2
+gen_1024 3
+
+.globl _main
+_main:
+  ret

diff  --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s
new file mode 100644
index 0000000000000..3fbfcaebffd46
--- /dev/null
+++ b/lld/test/MachO/icf.s
@@ -0,0 +1,205 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; mkdir %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
+# RUN: %lld -lSystem -icf all -o %t/main %t/main.o
+# RUN: llvm-objdump -d --syms %t/main | FileCheck %s
+
+# CHECK-LABEL: SYMBOL TABLE:
+# CHECK:       [[#%x,MAIN:]] g   F __TEXT,__text _main
+# CHECK:       [[#%x,A:]]    g   F __TEXT,__text _a1
+# CHECK:       [[#%x,A]]     g   F __TEXT,__text _a2
+# CHECK:       [[#%x,C:]]    g   F __TEXT,__text _c
+# CHECK:       [[#%x,D:]]    g   F __TEXT,__text _d
+# CHECK:       [[#%x,E:]]    g   F __TEXT,__text _e
+# CHECK:       [[#%x,F:]]    g   F __TEXT,__text _f
+# CHECK:       [[#%x,G:]]    g   F __TEXT,__text _g
+# CHECK:       [[#%x,SR:]]   g   F __TEXT,__text _sr1
+# CHECK:       [[#%x,SR]]    g   F __TEXT,__text _sr2
+# CHECK:       [[#%x,MR:]]   g   F __TEXT,__text _mr1
+# CHECK:       [[#%x,MR]]    g   F __TEXT,__text _mr2
+### FIXME: Mutually-recursive functions with identical bodies (see below)
+# COM:         [[#%x,XR:]]   g   F __TEXT,__text _xr1
+# COM:         [[#%x,XR]]    g   F __TEXT,__text _xr2
+
+# CHECK-LABEL: Disassembly of section __TEXT,__text:
+# CHECK:       [[#%x,MAIN]] <_main>:
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a2>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a2>
+# CHECK-NEXT:  callq 0x[[#%x,C]]  <_c>
+# CHECK-NEXT:  callq 0x[[#%x,D]]  <_d>
+# CHECK-NEXT:  callq 0x[[#%x,E]]  <_e>
+# CHECK-NEXT:  callq 0x[[#%x,F]]  <_f>
+# CHECK-NEXT:  callq 0x[[#%x,G]]  <_g>
+# CHECK-NEXT:  callq 0x[[#%x,SR]] <_sr2>
+# CHECK-NEXT:  callq 0x[[#%x,SR]] <_sr2>
+# CHECK-NEXT:  callq 0x[[#%x,MR]] <_mr2>
+# CHECK-NEXT:  callq 0x[[#%x,MR]] <_mr2>
+### FIXME: Mutually-recursive functions with identical bodies (see below)
+# COM-NEXT:    callq 0x[[#%x,XR]] <_xr2>
+# COM-NEXT:    callq 0x[[#%x,XR]] <_xr2>
+
+### TODO:
+### * Fold: funcs only 
diff er in alignment
+### * No fold: func has personality/LSDA
+### * No fold: reloc references to absolute symbols with 
diff erent values
+### * No fold: func is weak? preemptable?
+### * No fold: relocs to N_ALT_ENTRY symbols
+
+.subsections_via_symbols
+.text
+
+### Fold: _a1 & _a2 have identical bodies, flags, relocs
+
+.globl _a1
+.p2align 4, 0x90
+_a1:
+  callq _d
+  mov ___nan at GOTPCREL(%rip), %rax
+  callq ___isnan
+  movl $0, %eax
+  ret
+
+.globl _a2
+.p2align 4, 0x90
+_a2:
+  callq _d
+  mov ___nan at GOTPCREL(%rip), %rax
+  callq ___isnan
+  movl $0, %eax
+  ret
+
+### No fold: _c has slightly 
diff erent body from _a1 & _a2
+
+.globl _c
+.p2align 4, 0x90
+_c:
+  callq _d
+  mov ___nan at GOTPCREL(%rip), %rax
+  callq ___isnan
+  movl $1, %eax
+  ret
+
+### No fold: _d has the same body as _a1 & _a2, but _d is recursive!
+
+.globl _d
+.p2align 4, 0x90
+_d:
+  callq _d
+  mov ___nan at GOTPCREL(%rip), %rax
+  callq ___isnan
+  movl $0, %eax
+  ret
+
+### No fold: the body of _e is longer
+
+.globl _e
+.p2align 4, 0x90
+_e:
+  callq _d
+  mov ___nan at GOTPCREL(%rip), %rax
+  callq ___isnan
+  movl $0, %eax
+  ret
+  nop
+
+### No fold: the dylib symbols 
diff er
+
+.globl _f
+.p2align 4, 0x90
+_f:
+  callq _d
+  mov ___inf at GOTPCREL(%rip), %rax
+  callq ___isnan
+  movl $0, %eax
+  ret
+
+.globl _g
+.p2align 4, 0x90
+_g:
+  callq _d
+  mov ___inf at GOTPCREL(%rip), %rax
+  callq ___isinf
+  movl $0, %eax
+  ret
+
+### Fold: Simple recursion
+
+.globl _sr1
+.p2align 4, 0x90
+_sr1:
+  callq _sr1
+  movl $2, %eax
+  ret
+
+.globl _sr2
+.p2align 4, 0x90
+_sr2:
+  callq _sr2
+  movl $2, %eax
+  ret
+
+### Fold: Mutually-recursive functions with symmetric bodies
+
+.globl _mr1
+.p2align 4, 0x90
+_mr1:
+  callq _mr1 # call myself
+  callq _mr2 # call my twin
+  movl $1, %eax
+  ret
+
+.globl _mr2
+.p2align 4, 0x90
+_mr2:
+  callq _mr2 # call myself
+  callq _mr1 # call my twin
+  movl $1, %eax
+  ret
+
+### Fold: Mutually-recursive functions with identical bodies
+###
+### FIXME: This test is currently broken. Recursive call sites have no relocs
+### and the non-zero displacement field is already written to the section
+### data, while non-recursive call sites use symbol relocs and section data
+### contains zeros in the displacement field. Thus, ICF's equalsConstant()
+### finds that the section data doesn't match.
+###
+### ELF folds this case properly because it emits symbol relocs for all calls,
+### even recursive ones.
+
+.globl _xr1
+.p2align 4, 0x90
+_xr1:
+  callq _xr1 # call myself
+  callq _xr2 # call my twin
+  movl $3, %eax
+  ret
+
+.globl _xr2
+.p2align 4, 0x90
+_xr2:
+  callq _xr1 # call my twin
+  callq _xr2 # call myself
+  movl $3, %eax
+  ret
+
+###
+
+.globl _main
+.p2align 4, 0x90
+_main:
+  callq _a1
+  callq _a2
+  callq _c
+  callq _d
+  callq _e
+  callq _f
+  callq _g
+  callq _sr1
+  callq _sr2
+  callq _mr1
+  callq _mr2
+  callq _xr1
+  callq _xr2
+  ret